Merge BoringSSL through af561c221d3af70bd0aa48024db4f1fcf1988eef.

2022-10-31 15:28:01 -07:00 · 2022-10-31 15:28:01 -07:00 · feeca747e2
commit feeca747e2
parent b97db43338 61cf8ec9b9
6 changed files with 385 additions and 23 deletions
--- a/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
+++ b/crypto/fipsmodule/modes/asm/ghashv8-armx.pl
@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@ -17,23 +17,31 @@
 # GHASH for ARMv8 Crypto Extension, 64-bit polynomial multiplication.
 #
 # June 2014
-# Initial version was developed in tight cooperation with Ard Biesheuvel
-# of Linaro from bits-n-pieces from other assembly modules. Just like
-# aesv8-armx.pl this module supports both AArch32 and AArch64 execution modes.
+#
+# Initial version was developed in tight cooperation with Ard
+# Biesheuvel of Linaro from bits-n-pieces from other assembly modules.
+# Just like aesv8-armx.pl this module supports both AArch32 and
+# AArch64 execution modes.
 #
 # July 2014
+#
 # Implement 2x aggregated reduction [see ghash-x86.pl for background
 # information].
 #
+# November 2017
+#
+# AArch64 register bank to "accommodate" 4x aggregated reduction and
+# improve performance by 20-70% depending on processor.
+#
 # Current performance in cycles per processed byte:
 #
-#		PMULL[2]	32-bit NEON(*)
-# Apple A7	0.92		5.62
-# Cortex-A53	1.01		8.39
-# Cortex-A57	1.17		7.61
-# Denver	0.71		6.02
-# Mongoose	1.10		8.06
-# Kryo		1.16		8.00
+#		64-bit PMULL	32-bit PMULL	32-bit NEON(*)
+# Apple A7	0.58		0.92		5.62
+# Cortex-A53	0.85		1.01		8.39
+# Cortex-A57	0.73		1.17		7.61
+# Denver	0.51		0.65		6.02
+# Mongoose	0.65		1.10		8.06
+# Kryo		0.76		1.16		8.00
 #
 # (*)	presented for reference/comparison purposes;

@ -62,6 +70,7 @@ my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14));
 $code=<<___;
 #include <ring-core/arm_arch.h>

+#if __ARM_MAX_ARCH__>=7
 .text
 ___
 $code.=".arch	armv8-a+crypto\n"	if ($flavour =~ /64/);
@ -129,8 +138,56 @@ gcm_init_clmul:
 	vext.8		$t1,$H2,$H2,#8		@ Karatsuba pre-processing
 	veor		$t1,$t1,$H2
 	vext.8		$Hhl,$t0,$t1,#8		@ pack Karatsuba pre-processed
-	vst1.64		{$Hhl-$H2},[x0]		@ store Htable[1..2]
+	vst1.64		{$Hhl-$H2},[x0],#32	@ store Htable[1..2]
+___
+if ($flavour =~ /64/) {
+my ($t3,$Yl,$Ym,$Yh) = map("q$_",(4..7));

+$code.=<<___;
+	@ calculate H^3 and H^4
+	vpmull.p64	$Xl,$H, $H2
+	 vpmull.p64	$Yl,$H2,$H2
+	vpmull2.p64	$Xh,$H, $H2
+	 vpmull2.p64	$Yh,$H2,$H2
+	vpmull.p64	$Xm,$t0,$t1
+	 vpmull.p64	$Ym,$t1,$t1
+
+	vext.8		$t0,$Xl,$Xh,#8		@ Karatsuba post-processing
+	 vext.8		$t1,$Yl,$Yh,#8
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t0
+	 veor		$t3,$Yl,$Yh
+	 veor		$Ym,$Ym,$t1
+	veor		$Xm,$Xm,$t2
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase
+	 veor		$Ym,$Ym,$t3
+	 vpmull.p64	$t3,$Yl,$xC2
+
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	 vmov		$Yh#lo,$Ym#hi
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	 vmov		$Ym#hi,$Yl#lo
+	veor		$Xl,$Xm,$t2
+	 veor		$Yl,$Ym,$t3
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase
+	 vext.8		$t3,$Yl,$Yl,#8
+	vpmull.p64	$Xl,$Xl,$xC2
+	 vpmull.p64	$Yl,$Yl,$xC2
+	veor		$t2,$t2,$Xh
+	 veor		$t3,$t3,$Yh
+	veor		$H, $Xl,$t2		@ H^3
+	 veor		$H2,$Yl,$t3		@ H^4
+
+	vext.8		$t0,$H, $H,#8		@ Karatsuba pre-processing
+	 vext.8		$t1,$H2,$H2,#8
+	veor		$t0,$t0,$H
+	 veor		$t1,$t1,$H2
+	vext.8		$Hhl,$t0,$t1,#8		@ pack Karatsuba pre-processed
+	vst1.64		{$H-$H2},[x0]		@ store Htable[3..5]
+___
+}
+$code.=<<___;
 	ret
 .size	gcm_init_clmul,.-gcm_init_clmul
 ___
@ -202,6 +259,10 @@ $code.=<<___;
 gcm_ghash_clmul:
 	AARCH64_VALID_CALL_TARGET
 ___
+$code.=<<___	if ($flavour =~ /64/);
+	cmp		$len,#64
+	b.hs		.Lgcm_ghash_v8_4x
+___
 $code.=<<___		if ($flavour !~ /64/);
 	vstmdb		sp!,{d8-d15}		@ 32-bit ABI says so
 ___
@ -349,10 +410,301 @@ $code.=<<___;
 	ret
 .size	gcm_ghash_clmul,.-gcm_ghash_clmul
 ___
+
+if ($flavour =~ /64/) {				# 4x subroutine
+my ($I0,$j1,$j2,$j3,
+    $I1,$I2,$I3,$H3,$H34,$H4,$Yl,$Ym,$Yh) = map("q$_",(4..7,15..23));
+
+$code.=<<___;
+.type	gcm_ghash_v8_4x,%function
+.align	4
+gcm_ghash_v8_4x:
+.Lgcm_ghash_v8_4x:
+	vld1.64		{$Xl},[$Xi]		@ load [rotated] Xi
+	vld1.64		{$H-$H2},[$Htbl],#48	@ load twisted H, ..., H^2
+	vmov.i8		$xC2,#0xe1
+	vld1.64		{$H3-$H4},[$Htbl]	@ load twisted H^3, ..., H^4
+	vshl.u64	$xC2,$xC2,#57		@ compose 0xc2.0 constant
+
+	vld1.64		{$I0-$j3},[$inp],#64
+#ifndef __ARMEB__
+	vrev64.8	$Xl,$Xl
+	vrev64.8	$j1,$j1
+	vrev64.8	$j2,$j2
+	vrev64.8	$j3,$j3
+	vrev64.8	$I0,$I0
+#endif
+	vext.8		$I3,$j3,$j3,#8
+	vext.8		$I2,$j2,$j2,#8
+	vext.8		$I1,$j1,$j1,#8
+
+	vpmull.p64	$Yl,$H,$I3		@ H·Ii+3
+	veor		$j3,$j3,$I3
+	vpmull2.p64	$Yh,$H,$I3
+	vpmull.p64	$Ym,$Hhl,$j3
+
+	vpmull.p64	$t0,$H2,$I2		@ H^2·Ii+2
+	veor		$j2,$j2,$I2
+	vpmull2.p64	$I2,$H2,$I2
+	vpmull2.p64	$j2,$Hhl,$j2
+
+	veor		$Yl,$Yl,$t0
+	veor		$Yh,$Yh,$I2
+	veor		$Ym,$Ym,$j2
+
+	vpmull.p64	$j3,$H3,$I1		@ H^3·Ii+1
+	veor		$j1,$j1,$I1
+	vpmull2.p64	$I1,$H3,$I1
+	vpmull.p64	$j1,$H34,$j1
+
+	veor		$Yl,$Yl,$j3
+	veor		$Yh,$Yh,$I1
+	veor		$Ym,$Ym,$j1
+
+	subs		$len,$len,#128
+	b.lo		.Ltail4x
+
+	b		.Loop4x
+
+.align	4
+.Loop4x:
+	veor		$t0,$I0,$Xl
+	 vld1.64	{$I0-$j3},[$inp],#64
+	vext.8		$IN,$t0,$t0,#8
+#ifndef __ARMEB__
+	 vrev64.8	$j1,$j1
+	 vrev64.8	$j2,$j2
+	 vrev64.8	$j3,$j3
+	 vrev64.8	$I0,$I0
+#endif
+
+	vpmull.p64	$Xl,$H4,$IN		@ H^4·(Xi+Ii)
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H4,$IN
+	 vext.8		$I3,$j3,$j3,#8
+	vpmull2.p64	$Xm,$H34,$t0
+
+	veor		$Xl,$Xl,$Yl
+	veor		$Xh,$Xh,$Yh
+	 vext.8		$I2,$j2,$j2,#8
+	veor		$Xm,$Xm,$Ym
+	 vext.8		$I1,$j1,$j1,#8
+
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	 vpmull.p64	$Yl,$H,$I3		@ H·Ii+3
+	 veor		$j3,$j3,$I3
+	veor		$Xm,$Xm,$t1
+	 vpmull2.p64	$Yh,$H,$I3
+	veor		$Xm,$Xm,$t2
+	 vpmull.p64	$Ym,$Hhl,$j3
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	 vpmull.p64	$t0,$H2,$I2		@ H^2·Ii+2
+	 veor		$j2,$j2,$I2
+	 vpmull2.p64	$I2,$H2,$I2
+	veor		$Xl,$Xm,$t2
+	 vpmull2.p64	$j2,$Hhl,$j2
+
+	 veor		$Yl,$Yl,$t0
+	 veor		$Yh,$Yh,$I2
+	 veor		$Ym,$Ym,$j2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	 vpmull.p64	$j3,$H3,$I1		@ H^3·Ii+1
+	 veor		$j1,$j1,$I1
+	veor		$t2,$t2,$Xh
+	 vpmull2.p64	$I1,$H3,$I1
+	 vpmull.p64	$j1,$H34,$j1
+
+	veor		$Xl,$Xl,$t2
+	 veor		$Yl,$Yl,$j3
+	 veor		$Yh,$Yh,$I1
+	vext.8		$Xl,$Xl,$Xl,#8
+	 veor		$Ym,$Ym,$j1
+
+	subs		$len,$len,#64
+	b.hs		.Loop4x
+
+.Ltail4x:
+	veor		$t0,$I0,$Xl
+	vext.8		$IN,$t0,$t0,#8
+
+	vpmull.p64	$Xl,$H4,$IN		@ H^4·(Xi+Ii)
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H4,$IN
+	vpmull2.p64	$Xm,$H34,$t0
+
+	veor		$Xl,$Xl,$Yl
+	veor		$Xh,$Xh,$Yh
+	veor		$Xm,$Xm,$Ym
+
+	adds		$len,$len,#64
+	b.eq		.Ldone4x
+
+	cmp		$len,#32
+	b.lo		.Lone
+	b.eq		.Ltwo
+.Lthree:
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	 vld1.64	{$I0-$j2},[$inp]
+	veor		$Xm,$Xm,$t2
+#ifndef	__ARMEB__
+	 vrev64.8	$j1,$j1
+	 vrev64.8	$j2,$j2
+	 vrev64.8	$I0,$I0
+#endif
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	 vext.8		$I2,$j2,$j2,#8
+	 vext.8		$I1,$j1,$j1,#8
+	veor		$Xl,$Xm,$t2
+
+	 vpmull.p64	$Yl,$H,$I2		@ H·Ii+2
+	 veor		$j2,$j2,$I2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	 vpmull2.p64	$Yh,$H,$I2
+	 vpmull.p64	$Ym,$Hhl,$j2
+	veor		$Xl,$Xl,$t2
+	 vpmull.p64	$j3,$H2,$I1		@ H^2·Ii+1
+	 veor		$j1,$j1,$I1
+	vext.8		$Xl,$Xl,$Xl,#8
+
+	 vpmull2.p64	$I1,$H2,$I1
+	veor		$t0,$I0,$Xl
+	 vpmull2.p64	$j1,$Hhl,$j1
+	vext.8		$IN,$t0,$t0,#8
+
+	 veor		$Yl,$Yl,$j3
+	 veor		$Yh,$Yh,$I1
+	 veor		$Ym,$Ym,$j1
+
+	vpmull.p64	$Xl,$H3,$IN		@ H^3·(Xi+Ii)
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H3,$IN
+	vpmull.p64	$Xm,$H34,$t0
+
+	veor		$Xl,$Xl,$Yl
+	veor		$Xh,$Xh,$Yh
+	veor		$Xm,$Xm,$Ym
+	b		.Ldone4x
+
+.align	4
+.Ltwo:
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	 vld1.64	{$I0-$j1},[$inp]
+	veor		$Xm,$Xm,$t2
+#ifndef	__ARMEB__
+	 vrev64.8	$j1,$j1
+	 vrev64.8	$I0,$I0
+#endif
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	 vext.8		$I1,$j1,$j1,#8
+	veor		$Xl,$Xm,$t2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	veor		$Xl,$Xl,$t2
+	vext.8		$Xl,$Xl,$Xl,#8
+
+	 vpmull.p64	$Yl,$H,$I1		@ H·Ii+1
+	 veor		$j1,$j1,$I1
+
+	veor		$t0,$I0,$Xl
+	vext.8		$IN,$t0,$t0,#8
+
+	 vpmull2.p64	$Yh,$H,$I1
+	 vpmull.p64	$Ym,$Hhl,$j1
+
+	vpmull.p64	$Xl,$H2,$IN		@ H^2·(Xi+Ii)
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H2,$IN
+	vpmull2.p64	$Xm,$Hhl,$t0
+
+	veor		$Xl,$Xl,$Yl
+	veor		$Xh,$Xh,$Yh
+	veor		$Xm,$Xm,$Ym
+	b		.Ldone4x
+
+.align	4
+.Lone:
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	 vld1.64	{$I0},[$inp]
+	veor		$Xm,$Xm,$t2
+#ifndef	__ARMEB__
+	 vrev64.8	$I0,$I0
+#endif
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	veor		$Xl,$Xm,$t2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	veor		$Xl,$Xl,$t2
+	vext.8		$Xl,$Xl,$Xl,#8
+
+	veor		$t0,$I0,$Xl
+	vext.8		$IN,$t0,$t0,#8
+
+	vpmull.p64	$Xl,$H,$IN
+	veor		$t0,$t0,$IN
+	vpmull2.p64	$Xh,$H,$IN
+	vpmull.p64	$Xm,$Hhl,$t0
+
+.Ldone4x:
+	vext.8		$t1,$Xl,$Xh,#8		@ Karatsuba post-processing
+	veor		$t2,$Xl,$Xh
+	veor		$Xm,$Xm,$t1
+	veor		$Xm,$Xm,$t2
+
+	vpmull.p64	$t2,$Xl,$xC2		@ 1st phase of reduction
+	vmov		$Xh#lo,$Xm#hi		@ Xh|Xm - 256-bit result
+	vmov		$Xm#hi,$Xl#lo		@ Xm is rotated Xl
+	veor		$Xl,$Xm,$t2
+
+	vext.8		$t2,$Xl,$Xl,#8		@ 2nd phase of reduction
+	vpmull.p64	$Xl,$Xl,$xC2
+	veor		$t2,$t2,$Xh
+	veor		$Xl,$Xl,$t2
+	vext.8		$Xl,$Xl,$Xl,#8
+
+#ifndef __ARMEB__
+	vrev64.8	$Xl,$Xl
+#endif
+	vst1.64		{$Xl},[$Xi]		@ write out Xi
+
+	ret
+.size	gcm_ghash_v8_4x,.-gcm_ghash_v8_4x
+___
+
 }
+}
+
 $code.=<<___;
 .asciz  "GHASH for ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 .align  2
+#endif
 ___

 if ($flavour =~ /64/) {			######## 64-bit code
@ -360,7 +712,8 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	my $arg=shift;

 	$arg =~ m/q([0-9]+)#(lo|hi),\s*q([0-9]+)#(lo|hi)/o &&
-	sprintf	"ins	v%d.d[%d],v%d.d[%d]",$1,($2 eq "lo")?0:1,$3,($4 eq "lo")?0:1;
+	sprintf	"ins	v%d.d[%d],v%d.d[%d]",$1<8?$1:$1+8,($2 eq "lo")?0:1,
+					     $3<8?$3:$3+8,($4 eq "lo")?0:1;
    }
    foreach(split("\n",$code)) {
 	s/cclr\s+([wx])([^,]+),\s*([a-z]+)/csel	$1$2,$1zr,$1$2,$3/o	or
--- a/crypto/fipsmodule/sha/asm/sha512-armv8.pl
+++ b/crypto/fipsmodule/sha/asm/sha512-armv8.pl
@ -1,5 +1,5 @@
 #! /usr/bin/env perl
-# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+# Copyright 2014-2020 The OpenSSL Project Authors. All Rights Reserved.
 #
 # Licensed under the OpenSSL license (the "License").  You may not use
 # this file except in compliance with the License.  You can obtain a copy
@ -27,6 +27,7 @@
 # Denver	2.01		10.5 (+26%)	6.70 (+8%)
 # X-Gene			20.0 (+100%)	12.8 (+300%(***))
 # Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
+# Kryo		1.92		17.4 (+30%)	11.2 (+8%)
 #
 # (*)	Software SHA256 results are of lesser relevance, presented
 #	mostly for informational purposes.
@ -35,7 +36,7 @@
 #	on Cortex-A53 (or by 4 cycles per round).
 # (***)	Super-impressive coefficients over gcc-generated code are
 #	indication of some compiler "pathology", most notably code
-#	generated with -mgeneral-regs-only is significanty faster
+#	generated with -mgeneral-regs-only is significantly faster
 #	and the gap is only 40-90%.

 $output=pop;
@ -89,7 +90,7 @@ my ($T0,$T1,$T2)=(@X[($i-8)&15],@X[($i-9)&15],@X[($i-10)&15]);
   $T0=@X[$i+3] if ($i<11);

 $code.=<<___	if ($i<16);
-#ifndef	__ARMEB__
+#ifndef	__AARCH64EB__
 	rev	@X[$i],@X[$i]			// $i
 #endif
 ___
@ -449,12 +450,15 @@ close SELF;

 foreach(split("\n",$code)) {

-	s/\`([^\`]*)\`/eval($1)/geo;
+	s/\`([^\`]*)\`/eval($1)/ge;

-	s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/geo;
+	s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/ge;

-	s/\.\w?32\b//o		and s/\.16b/\.4s/go;
-	m/(ld|st)1[^\[]+\[0\]/o	and s/\.4s/\.s/go;
+	s/\bq([0-9]+)\b/v$1.16b/g;		# old->new registers
+
+	s/\.[ui]?8(\s)/$1/;
+	s/\.\w?32\b//		and s/\.16b/\.4s/g;
+	m/(ld|st)1[^\[]+\[0\]/	and s/\.4s/\.s/g;

 	print $_,"\n";
 }
--- a/crypto/perlasm/x86_64-xlate.pl
+++ b/crypto/perlasm/x86_64-xlate.pl
@ -1136,7 +1136,7 @@ my $endbranch = sub {
 ########################################################################

 {
-  my $comment = "#";
+  my $comment = "//";
  $comment = ";" if ($masm || $nasm);
  print <<___;
 $comment This file is generated from a similarly-named Perl script in the BoringSSL
--- a/crypto/perlasm/x86asm.pl
+++ b/crypto/perlasm/x86asm.pl
@ -275,7 +275,7 @@ sub ::asciz

 sub ::asm_finish
 {   &file_end();
-    my $comment = "#";
+    my $comment = "//";
    $comment = ";" if ($win32);
    print <<___;
 $comment This file is generated from a similarly-named Perl script in the BoringSSL
--- a/src/aead/gcm/gcm_nohw.rs
+++ b/src/aead/gcm/gcm_nohw.rs
@ -138,7 +138,7 @@ fn gcm_mul64_nohw(a: u64, b: u64) -> (u64, u64) {
 }

 pub(super) fn init(xi: [u64; 2]) -> super::u128 {
-    // We implement GHASH in terms of POLYVAL, as described in RFC8452. This
+    // We implement GHASH in terms of POLYVAL, as described in RFC 8452. This
    // avoids a shift by 1 in the multiplication, needed to account for bit
    // reversal losing a bit after multiplication, that is,
    // rev128(X) * rev128(Y) = rev255(X*Y).
--- a/tests/hmac_tests.txt
+++ b/tests/hmac_tests.txt
@ -10,6 +10,7 @@ Input = "Sample message for keylen<blocklen"
 Key = 000102030405060708090A0B0C0D0E0F10111213
 Output = 4C99FF0CB1B31BD33F8431DBAF4D17FCD356A807

+# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
 HMAC = SHA1
 Input = "Sample message for keylen=blocklen"
 Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F60616263
@ -25,6 +26,7 @@ Input = "Sample message for keylen<blocklen"
 Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B
 Output = E3D249A8CFB67EF8B7A169E9A0A599714A2CECBA65999A51BEB8FBBE

+# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
 HMAC = SHA224
 Input = "Sample message for keylen=blocklen"
 Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F60616263
@ -40,6 +42,7 @@ Input = "Sample message for keylen<blocklen"
 Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F
 Output = A28CF43130EE696A98F14A37678B56BCFCBDD9E5CF69717FECF5480F0EBDF790

+# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
 HMAC = SHA256
 Input = "Sample message for keylen=blocklen"
 Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F60616263
@ -55,6 +58,7 @@ Input = "Sample message for keylen<blocklen"
 Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F
 Output = 6EB242BDBB582CA17BEBFA481B1E23211464D2B7F8C20B9FF2201637B93646AF5AE9AC316E98DB45D9CAE773675EEED0

+# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
 HMAC = SHA384
 Input = "Sample message for keylen=blocklen"
 Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9FA0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7
@ -70,6 +74,7 @@ Input = "Sample message for keylen<blocklen"
 Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F
 Output = FD44C18BDA0BB0A6CE0E82B031BF2818F6539BD56EC00BDC10A8A2D730B3634DE2545D639B0F2CF710D0692C72A1896F1F211C2B922D1A96C392E07E7EA9FEDC

+# This is actually keylen>blocklen, but the NIST test vectors have a misleading input.
 HMAC = SHA512
 Input = "Sample message for keylen=blocklen"
 Key = 000102030405060708090A0B0C0D0E0F101112131415161718191A1B1C1D1E1F202122232425262728292A2B2C2D2E2F303132333435363738393A3B3C3D3E3F404142434445464748494A4B4C4D4E4F505152535455565758595A5B5C5D5E5F606162636465666768696A6B6C6D6E6F707172737475767778797A7B7C7D7E7F808182838485868788898A8B8C8D8E8F909192939495969798999A9B9C9D9E9FA0A1A2A3A4A5A6A7A8A9AAABACADAEAFB0B1B2B3B4B5B6B7B8B9BABBBCBDBEBFC0C1C2C3C4C5C6C7