Use C instead of assembly fallback code in GCM on X86_64.

This will ensure that this code is tested in CI and is being compiled by MSVC; previously this C code wasn't being tested at all because all platforms we use for testing were taking other code paths.
2019-01-18 12:23:36 -10:00 · 2019-01-18 12:23:36 -10:00 · 0cd9bf6f64
commit 0cd9bf6f64
parent 225f6b0c3a
2 changed files with 1 additions and 231 deletions
--- a/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
+++ b/crypto/fipsmodule/modes/asm/ghash-x86_64.pl
@ -222,220 +222,6 @@ ___
 $code=<<___;
 .text
 .extern	GFp_ia32cap_P
-
-.globl	GFp_gcm_gmult_4bit
-.type	GFp_gcm_gmult_4bit,\@function,2
-.align	16
-GFp_gcm_gmult_4bit:
-.cfi_startproc
-	push	%rbx
-.cfi_push	%rbx
-	push	%rbp		# %rbp and others are pushed exclusively in
-.cfi_push	%rbp
-	push	%r12		# order to reuse Win64 exception handler...
-.cfi_push	%r12
-	push	%r13
-.cfi_push	%r13
-	push	%r14
-.cfi_push	%r14
-	push	%r15
-.cfi_push	%r15
-	sub	\$280,%rsp
-.cfi_adjust_cfa_offset	280
-.Lgmult_prologue:
-
-	movzb	15($Xi),$Zlo
-	lea	.Lrem_4bit(%rip),$rem_4bit
-___
-	&loop	($Xi);
-$code.=<<___;
-	mov	$Zlo,8($Xi)
-	mov	$Zhi,($Xi)
-
-	lea	280+48(%rsp),%rsi
-.cfi_def_cfa	%rsi,8
-	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
-	lea	(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
-.Lgmult_epilogue:
-	ret
-.cfi_endproc
-.size	GFp_gcm_gmult_4bit,.-GFp_gcm_gmult_4bit
-___
-
-# per-function register layout
-$inp="%rdx";
-$len="%rcx";
-$rem_8bit=$rem_4bit;
-
-$code.=<<___;
-.globl	GFp_gcm_ghash_4bit
-.type	GFp_gcm_ghash_4bit,\@function,4
-.align	16
-GFp_gcm_ghash_4bit:
-.cfi_startproc
-	push	%rbx
-.cfi_push	%rbx
-	push	%rbp
-.cfi_push	%rbp
-	push	%r12
-.cfi_push	%r12
-	push	%r13
-.cfi_push	%r13
-	push	%r14
-.cfi_push	%r14
-	push	%r15
-.cfi_push	%r15
-	sub	\$280,%rsp
-.cfi_adjust_cfa_offset	280
-.Lghash_prologue:
-	mov	$inp,%r14		# reassign couple of args
-	mov	$len,%r15
-___
-{ my $inp="%r14";
-  my $dat="%edx";
-  my $len="%r15";
-  my @nhi=("%ebx","%ecx");
-  my @rem=("%r12","%r13");
-  my $Hshr4="%rbp";
-
-	&sub	($Htbl,-128);		# size optimization
-	&lea	($Hshr4,"16+128(%rsp)");
-	{ my @lo =($nlo,$nhi);
-          my @hi =($Zlo,$Zhi);
-
-	  &xor	($dat,$dat);
-	  for ($i=0,$j=-2;$i<18;$i++,$j++) {
-	    &mov	("$j(%rsp)",&LB($dat))		if ($i>1);
-	    &or		($lo[0],$tmp)			if ($i>1);
-	    &mov	(&LB($dat),&LB($lo[1]))		if ($i>0 && $i<17);
-	    &shr	($lo[1],4)			if ($i>0 && $i<17);
-	    &mov	($tmp,$hi[1])			if ($i>0 && $i<17);
-	    &shr	($hi[1],4)			if ($i>0 && $i<17);
-	    &mov	("8*$j($Hshr4)",$hi[0])		if ($i>1);
-	    &mov	($hi[0],"16*$i+0-128($Htbl)")	if ($i<16);
-	    &shl	(&LB($dat),4)			if ($i>0 && $i<17);
-	    &mov	("8*$j-128($Hshr4)",$lo[0])	if ($i>1);
-	    &mov	($lo[0],"16*$i+8-128($Htbl)")	if ($i<16);
-	    &shl	($tmp,60)			if ($i>0 && $i<17);
-
-	    push	(@lo,shift(@lo));
-	    push	(@hi,shift(@hi));
-	  }
-	}
-	&add	($Htbl,-128);
-	&mov	($Zlo,"8($Xi)");
-	&mov	($Zhi,"0($Xi)");
-	&add	($len,$inp);		# pointer to the end of data
-	&lea	($rem_8bit,".Lrem_8bit(%rip)");
-	&jmp	(".Louter_loop");
-
-$code.=".align	16\n.Louter_loop:\n";
-	&xor	($Zhi,"($inp)");
-	&mov	("%rdx","8($inp)");
-	&lea	($inp,"16($inp)");
-	&xor	("%rdx",$Zlo);
-	&mov	("($Xi)",$Zhi);
-	&mov	("8($Xi)","%rdx");
-	&shr	("%rdx",32);
-
-	&xor	($nlo,$nlo);
-	&rol	($dat,8);
-	&mov	(&LB($nlo),&LB($dat));
-	&movz	($nhi[0],&LB($dat));
-	&shl	(&LB($nlo),4);
-	&shr	($nhi[0],4);
-
-	for ($j=11,$i=0;$i<15;$i++) {
-	    &rol	($dat,8);
-	    &xor	($Zlo,"8($Htbl,$nlo)")			if ($i>0);
-	    &xor	($Zhi,"($Htbl,$nlo)")			if ($i>0);
-	    &mov	($Zlo,"8($Htbl,$nlo)")			if ($i==0);
-	    &mov	($Zhi,"($Htbl,$nlo)")			if ($i==0);
-
-	    &mov	(&LB($nlo),&LB($dat));
-	    &xor	($Zlo,$tmp)				if ($i>0);
-	    &movzw	($rem[1],"($rem_8bit,$rem[1],2)")	if ($i>0);
-
-	    &movz	($nhi[1],&LB($dat));
-	    &shl	(&LB($nlo),4);
-	    &movzb	($rem[0],"(%rsp,$nhi[0])");
-
-	    &shr	($nhi[1],4)				if ($i<14);
-	    &and	($nhi[1],0xf0)				if ($i==14);
-	    &shl	($rem[1],48)				if ($i>0);
-	    &xor	($rem[0],$Zlo);
-
-	    &mov	($tmp,$Zhi);
-	    &xor	($Zhi,$rem[1])				if ($i>0);
-	    &shr	($Zlo,8);
-
-	    &movz	($rem[0],&LB($rem[0]));
-	    &mov	($dat,"$j($Xi)")			if (--$j%4==0);
-	    &shr	($Zhi,8);
-
-	    &xor	($Zlo,"-128($Hshr4,$nhi[0],8)");
-	    &shl	($tmp,56);
-	    &xor	($Zhi,"($Hshr4,$nhi[0],8)");
-
-	    unshift	(@nhi,pop(@nhi));		# "rotate" registers
-	    unshift	(@rem,pop(@rem));
-	}
-	&movzw	($rem[1],"($rem_8bit,$rem[1],2)");
-	&xor	($Zlo,"8($Htbl,$nlo)");
-	&xor	($Zhi,"($Htbl,$nlo)");
-
-	&shl	($rem[1],48);
-	&xor	($Zlo,$tmp);
-
-	&xor	($Zhi,$rem[1]);
-	&movz	($rem[0],&LB($Zlo));
-	&shr	($Zlo,4);
-
-	&mov	($tmp,$Zhi);
-	&shl	(&LB($rem[0]),4);
-	&shr	($Zhi,4);
-
-	&xor	($Zlo,"8($Htbl,$nhi[0])");
-	&movzw	($rem[0],"($rem_8bit,$rem[0],2)");
-	&shl	($tmp,60);
-
-	&xor	($Zhi,"($Htbl,$nhi[0])");
-	&xor	($Zlo,$tmp);
-	&shl	($rem[0],48);
-
-	&bswap	($Zlo);
-	&xor	($Zhi,$rem[0]);
-
-	&bswap	($Zhi);
-	&cmp	($inp,$len);
-	&jb	(".Louter_loop");
-}
-$code.=<<___;
-	mov	$Zlo,8($Xi)
-	mov	$Zhi,($Xi)
-
-	lea	280+48(%rsp),%rsi
-.cfi_def_cfa	%rsi,8
-	mov	-48(%rsi),%r15
-.cfi_restore	%r15
-	mov	-40(%rsi),%r14
-.cfi_restore	%r14
-	mov	-32(%rsi),%r13
-.cfi_restore	%r13
-	mov	-24(%rsi),%r12
-.cfi_restore	%r12
-	mov	-16(%rsi),%rbp
-.cfi_restore	%rbp
-	mov	-8(%rsi),%rbx
-.cfi_restore	%rbx
-	lea	0(%rsi),%rsp
-.cfi_def_cfa_register	%rsp
-.Lghash_epilogue:
-	ret
-.cfi_endproc
-.size	GFp_gcm_ghash_4bit,.-GFp_gcm_ghash_4bit
 ___

 ######################################################################
@ -1721,14 +1507,6 @@ se_handler:

 .section	.pdata
 .align	4
-	.rva	.LSEH_begin_GFp_gcm_gmult_4bit
-	.rva	.LSEH_end_GFp_gcm_gmult_4bit
-	.rva	.LSEH_info_GFp_gcm_gmult_4bit
-
-	.rva	.LSEH_begin_GFp_gcm_ghash_4bit
-	.rva	.LSEH_end_GFp_gcm_ghash_4bit
-	.rva	.LSEH_info_GFp_gcm_ghash_4bit
-
 	.rva	.LSEH_begin_GFp_gcm_init_clmul
 	.rva	.LSEH_end_GFp_gcm_init_clmul
 	.rva	.LSEH_info_GFp_gcm_init_clmul
@ -1749,14 +1527,6 @@ ___
 $code.=<<___;
 .section	.xdata
 .align	8
-.LSEH_info_GFp_gcm_gmult_4bit:
-	.byte	9,0,0,0
-	.rva	se_handler
-	.rva	.Lgmult_prologue,.Lgmult_epilogue	# HandlerData
-.LSEH_info_GFp_gcm_ghash_4bit:
-	.byte	9,0,0,0
-	.rva	se_handler
-	.rva	.Lghash_prologue,.Lghash_epilogue	# HandlerData
 .LSEH_info_GFp_gcm_init_clmul:
 	.byte	0x01,0x08,0x03,0x00
 	.byte	0x08,0x68,0x00,0x00	#movaps	0x00(rsp),xmm6
--- a/crypto/fipsmodule/modes/gcm.c
+++ b/crypto/fipsmodule/modes/gcm.c
@ -124,7 +124,7 @@ static void gcm_init_4bit(u128 Htable[16], const uint64_t H[2]) {
 #endif
 }

-#if defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE)
+#if defined(OPENSSL_AARCH64) || defined(OPENSSL_PPC64LE) || defined(OPENSSL_X86_64)
 static const size_t rem_4bit[16] = {
    PACK(0x0000), PACK(0x1C20), PACK(0x3840), PACK(0x2460),
    PACK(0x7080), PACK(0x6CA0), PACK(0x48C0), PACK(0x54E0),