From f6cf8bbc84b4f6c82b784a229ba360afbaea6332 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Fri, 2 Feb 2018 19:12:10 -0500 Subject: [PATCH] Sync up AES assembly. This syncs up with OpenSSL master as of 50ea9d2b3521467a11559be41dcf05ee05feabd6. The non-license non-spelling changes are CFI bits, which were added in upstream in b84460ad3a3e4fcb22efaa0a8365b826f4264ecf. Change-Id: I42280985f834d5b9133eacafc8ff9dbd2f0ea59a Reviewed-on: https://boringssl-review.googlesource.com/25704 Reviewed-by: Adam Langley --- crypto/fipsmodule/aes/asm/aes-x86_64.pl | 97 +++++++++++++++++- crypto/fipsmodule/aes/asm/aesni-x86_64.pl | 83 +++++++++++++--- crypto/fipsmodule/aes/asm/aesv8-armx.pl | 4 +- crypto/fipsmodule/aes/asm/bsaes-x86_64.pl | 116 +++++++++++++++++++++- 4 files changed, 282 insertions(+), 18 deletions(-) diff --git a/crypto/fipsmodule/aes/asm/aes-x86_64.pl b/crypto/fipsmodule/aes/asm/aes-x86_64.pl index 4bf7db3fd..8b74ef00f 100755 --- a/crypto/fipsmodule/aes/asm/aes-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aes-x86_64.pl @@ -1,7 +1,14 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== -# Written by Andy Polyakov for the OpenSSL +# Written by Andy Polyakov for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. @@ -590,13 +597,21 @@ $code.=<<___; .type asm_AES_encrypt,\@function,3 .hidden asm_AES_encrypt asm_AES_encrypt: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 # allocate frame "above" key schedule lea -63(%rdx),%rcx # %rdx is key argument @@ -609,6 +624,7 @@ asm_AES_encrypt: mov %rsi,16(%rsp) # save out mov %rax,24(%rsp) # save original stack pointer +.cfi_cfa_expression %rsp+24,deref,+8 .Lenc_prologue: mov %rdx,$key @@ -635,20 +651,29 @@ asm_AES_encrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer +.cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Lenc_epilogue: ret +.cfi_endproc .size asm_AES_encrypt,.-asm_AES_encrypt ___ @@ -1186,13 +1211,21 @@ $code.=<<___; .type asm_AES_decrypt,\@function,3 .hidden asm_AES_decrypt asm_AES_decrypt: +.cfi_startproc mov %rsp,%rax +.cfi_def_cfa_register %rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 # allocate frame "above" key schedule lea -63(%rdx),%rcx # %rdx is key argument @@ -1205,6 +1238,7 @@ asm_AES_decrypt: mov %rsi,16(%rsp) # save out mov %rax,24(%rsp) # save original stack pointer +.cfi_cfa_expression %rsp+24,deref,+8 .Ldec_prologue: mov %rdx,$key @@ -1233,20 +1267,29 @@ asm_AES_decrypt: mov 16(%rsp),$out # restore out mov 24(%rsp),%rsi # restore saved stack pointer +.cfi_def_cfa %rsi,8 mov $s0,0($out) # write output vector mov $s1,4($out) mov $s2,8($out) mov $s3,12($out) mov -48(%rsi),%r15 +.cfi_restore %r15 mov -40(%rsi),%r14 +.cfi_restore %r14 mov -32(%rsi),%r13 +.cfi_restore %r13 mov -24(%rsi),%r12 +.cfi_restore %r12 mov -16(%rsi),%rbp +.cfi_restore %rbp mov -8(%rsi),%rbx +.cfi_restore %rbx lea (%rsi),%rsp +.cfi_def_cfa_register %rsp .Ldec_epilogue: ret +.cfi_endproc .size asm_AES_decrypt,.-asm_AES_decrypt ___ #------------------------------------------------------------------# @@ -1284,22 +1327,34 @@ $code.=<<___; .globl asm_AES_set_encrypt_key .type asm_AES_set_encrypt_key,\@function,3 asm_AES_set_encrypt_key: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 # redundant, but allows to share +.cfi_push %r12 push %r13 # exception handler... +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 sub \$8,%rsp +.cfi_adjust_cfa_offset 8 .Lenc_key_prologue: call _x86_64_AES_set_encrypt_key mov 40(%rsp),%rbp +.cfi_restore %rbp mov 48(%rsp),%rbx +.cfi_restore %rbx add \$56,%rsp +.cfi_adjust_cfa_offset -56 .Lenc_key_epilogue: ret +.cfi_endproc .size asm_AES_set_encrypt_key,.-asm_AES_set_encrypt_key .type _x86_64_AES_set_encrypt_key,\@abi-omnipotent @@ -1549,13 +1604,21 @@ $code.=<<___; .globl asm_AES_set_decrypt_key .type asm_AES_set_decrypt_key,\@function,3 asm_AES_set_decrypt_key: +.cfi_startproc push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 push %rdx # save key schedule +.cfi_adjust_cfa_offset 8 .Ldec_key_prologue: call _x86_64_AES_set_encrypt_key @@ -1609,14 +1672,22 @@ $code.=<<___; xor %rax,%rax .Labort: mov 8(%rsp),%r15 +.cfi_restore %r15 mov 16(%rsp),%r14 +.cfi_restore %r14 mov 24(%rsp),%r13 +.cfi_restore %r13 mov 32(%rsp),%r12 +.cfi_restore %r12 mov 40(%rsp),%rbp +.cfi_restore %rbp mov 48(%rsp),%rbx +.cfi_restore %rbx add \$56,%rsp +.cfi_adjust_cfa_offset -56 .Ldec_key_epilogue: ret +.cfi_endproc .size asm_AES_set_decrypt_key,.-asm_AES_set_decrypt_key ___ @@ -1645,15 +1716,23 @@ $code.=<<___; .extern OPENSSL_ia32cap_P .hidden asm_AES_cbc_encrypt asm_AES_cbc_encrypt: +.cfi_startproc cmp \$0,%rdx # check length je .Lcbc_epilogue pushfq +.cfi_push 49 # %rflags push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 .Lcbc_prologue: cld @@ -1699,8 +1778,10 @@ asm_AES_cbc_encrypt: .Lcbc_te_ok: xchg %rsp,$key +.cfi_def_cfa_register $key #add \$8,%rsp # reserve for return address! mov $key,$_rsp # save %rsp +.cfi_cfa_expression $_rsp,deref,+64 .Lcbc_fast_body: mov %rdi,$_inp # save copy of inp mov %rsi,$_out # save copy of out @@ -1930,7 +2011,7 @@ asm_AES_cbc_encrypt: lea ($key,%rax),%rax mov %rax,$keyend - # pick Te4 copy which can't "overlap" with stack frame or key scdedule + # pick Te4 copy which can't "overlap" with stack frame or key schedule lea 2048($sbox),$sbox lea 768-8(%rsp),%rax sub $sbox,%rax @@ -2082,17 +2163,27 @@ asm_AES_cbc_encrypt: .align 16 .Lcbc_exit: mov $_rsp,%rsi +.cfi_def_cfa %rsi,64 mov (%rsi),%r15 +.cfi_restore %r15 mov 8(%rsi),%r14 +.cfi_restore %r14 mov 16(%rsi),%r13 +.cfi_restore %r13 mov 24(%rsi),%r12 +.cfi_restore %r12 mov 32(%rsi),%rbp +.cfi_restore %rbp mov 40(%rsi),%rbx +.cfi_restore %rbx lea 48(%rsi),%rsp +.cfi_def_cfa %rsp,16 .Lcbc_popfq: popfq +.cfi_pop 49 # %rflags .Lcbc_epilogue: ret +.cfi_endproc .size asm_AES_cbc_encrypt,.-asm_AES_cbc_encrypt ___ } diff --git a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl index a9b31515b..a18694187 100644 --- a/crypto/fipsmodule/aes/asm/aesni-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/aesni-x86_64.pl @@ -60,7 +60,7 @@ # identical to CBC, because CBC-MAC is essentially CBC encrypt without # saving output. CCM CTR "stays invisible," because it's neatly # interleaved wih CBC-MAC. This provides ~30% improvement over -# "straghtforward" CCM implementation with CTR and CBC-MAC performed +# "straightforward" CCM implementation with CTR and CBC-MAC performed # disjointly. Parallelizable modes practically achieve the theoretical # limit. # @@ -143,14 +143,14 @@ # asymptotic, if it can be surpassed, isn't it? What happens there? # Rewind to CBC paragraph for the answer. Yes, out-of-order execution # magic is responsible for this. Processor overlaps not only the -# additional instructions with AES ones, but even AES instuctions +# additional instructions with AES ones, but even AES instructions # processing adjacent triplets of independent blocks. In the 6x case # additional instructions still claim disproportionally small amount # of additional cycles, but in 8x case number of instructions must be # a tad too high for out-of-order logic to cope with, and AES unit # remains underutilized... As you can see 8x interleave is hardly # justifiable, so there no need to feel bad that 32-bit aesni-x86.pl -# utilizies 6x interleave because of limited register bank capacity. +# utilizes 6x interleave because of limited register bank capacity. # # Higher interleave factors do have negative impact on Westmere # performance. While for ECB mode it's negligible ~1.5%, other @@ -1182,6 +1182,7 @@ $code.=<<___; .type aesni_ctr32_encrypt_blocks,\@function,5 .align 16 aesni_ctr32_encrypt_blocks: +.cfi_startproc cmp \$1,$len jne .Lctr32_bulk @@ -1204,7 +1205,9 @@ $code.=<<___; .align 16 .Lctr32_bulk: lea (%rsp),$key_ # use $key_ as frame pointer +.cfi_def_cfa_register $key_ push %rbp +.cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ @@ -1548,7 +1551,7 @@ $code.=<<___; sub \$8,$len jnc .Lctr32_loop8 # loop if $len-=8 didn't borrow - add \$8,$len # restore real remainig $len + add \$8,$len # restore real remaining $len jz .Lctr32_done # done if ($len==0) lea -0x80($key),$key @@ -1665,7 +1668,7 @@ $code.=<<___; movups $inout2,0x20($out) # $len was 3, stop store .Lctr32_done: - xorps %xmm0,%xmm0 # clear regiser bank + xorps %xmm0,%xmm0 # clear register bank xor $key0,$key0 pxor %xmm1,%xmm1 pxor %xmm2,%xmm2 @@ -1725,9 +1728,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8($key_),%rbp +.cfi_restore %rbp lea ($key_),%rsp +.cfi_def_cfa_register %rsp .Lctr32_epilogue: ret +.cfi_endproc .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks ___ } @@ -1749,8 +1755,11 @@ $code.=<<___; .type aesni_xts_encrypt,\@function,6 .align 16 aesni_xts_encrypt: +.cfi_startproc lea (%rsp),%r11 # frame pointer +.cfi_def_cfa_register %r11 push %rbp +.cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ @@ -1848,7 +1857,7 @@ $code.=<<___; lea `16*6`($inp),$inp pxor $twmask,$inout5 - pxor $twres,@tweak[0] # calclulate tweaks^round[last] + pxor $twres,@tweak[0] # calculate tweaks^round[last] aesenc $rndkey1,$inout4 pxor $twres,@tweak[1] movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^round[last] @@ -2215,9 +2224,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8(%r11),%rbp +.cfi_restore %rbp lea (%r11),%rsp +.cfi_def_cfa_register %rsp .Lxts_enc_epilogue: ret +.cfi_endproc .size aesni_xts_encrypt,.-aesni_xts_encrypt ___ @@ -2226,8 +2238,11 @@ $code.=<<___; .type aesni_xts_decrypt,\@function,6 .align 16 aesni_xts_decrypt: +.cfi_startproc lea (%rsp),%r11 # frame pointer +.cfi_def_cfa_register %r11 push %rbp +.cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ @@ -2328,7 +2343,7 @@ $code.=<<___; lea `16*6`($inp),$inp pxor $twmask,$inout5 - pxor $twres,@tweak[0] # calclulate tweaks^round[last] + pxor $twres,@tweak[0] # calculate tweaks^round[last] aesdec $rndkey1,$inout4 pxor $twres,@tweak[1] movdqa @tweak[0],`16*0`(%rsp) # put aside tweaks^last round key @@ -2718,9 +2733,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8(%r11),%rbp +.cfi_restore %rbp lea (%r11),%rsp +.cfi_def_cfa_register %rsp .Lxts_dec_epilogue: ret +.cfi_endproc .size aesni_xts_decrypt,.-aesni_xts_decrypt ___ } @@ -2745,12 +2763,18 @@ $code.=<<___; .type aesni_ocb_encrypt,\@function,6 .align 32 aesni_ocb_encrypt: +.cfi_startproc lea (%rsp),%rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 ___ $code.=<<___ if ($win64); lea -0xa0(%rsp),%rsp @@ -2945,6 +2969,7 @@ $code.=<<___ if (!$win64); pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 lea 0x28(%rsp),%rax +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x00(%rsp),%xmm6 @@ -2972,13 +2997,20 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Locb_enc_epilogue: ret +.cfi_endproc .size aesni_ocb_encrypt,.-aesni_ocb_encrypt .type __ocb_encrypt6,\@abi-omnipotent @@ -3191,12 +3223,18 @@ __ocb_encrypt1: .type aesni_ocb_decrypt,\@function,6 .align 32 aesni_ocb_decrypt: +.cfi_startproc lea (%rsp),%rax push %rbx +.cfi_push %rbx push %rbp +.cfi_push %rbp push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 ___ $code.=<<___ if ($win64); lea -0xa0(%rsp),%rsp @@ -3413,6 +3451,7 @@ $code.=<<___ if (!$win64); pxor %xmm14,%xmm14 pxor %xmm15,%xmm15 lea 0x28(%rsp),%rax +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x00(%rsp),%xmm6 @@ -3440,13 +3479,20 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -40(%rax),%r14 +.cfi_restore %r14 mov -32(%rax),%r13 +.cfi_restore %r13 mov -24(%rax),%r12 +.cfi_restore %r12 mov -16(%rax),%rbp +.cfi_restore %rbp mov -8(%rax),%rbx +.cfi_restore %rbx lea (%rax),%rsp +.cfi_def_cfa_register %rsp .Locb_dec_epilogue: ret +.cfi_endproc .size aesni_ocb_decrypt,.-aesni_ocb_decrypt .type __ocb_decrypt6,\@abi-omnipotent @@ -3659,6 +3705,7 @@ $code.=<<___; .type ${PREFIX}_cbc_encrypt,\@function,6 .align 16 ${PREFIX}_cbc_encrypt: +.cfi_startproc test $len,$len # check length jz .Lcbc_ret @@ -3735,7 +3782,9 @@ $code.=<<___; .align 16 .Lcbc_decrypt_bulk: lea (%rsp),%r11 # frame pointer +.cfi_def_cfa_register %r11 push %rbp +.cfi_push %rbp sub \$$frame_size,%rsp and \$-16,%rsp # Linux kernel stack can be incorrectly seeded ___ @@ -4179,9 +4228,12 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -8(%r11),%rbp +.cfi_restore %rbp lea (%r11),%rsp +.cfi_def_cfa_register %rsp .Lcbc_ret: ret +.cfi_endproc .size ${PREFIX}_cbc_encrypt,.-${PREFIX}_cbc_encrypt ___ } @@ -4202,7 +4254,9 @@ $code.=<<___; .type ${PREFIX}_set_decrypt_key,\@abi-omnipotent .align 16 ${PREFIX}_set_decrypt_key: +.cfi_startproc .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 +.cfi_adjust_cfa_offset 8 call __aesni_set_encrypt_key shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key test %eax,%eax @@ -4235,15 +4289,16 @@ ${PREFIX}_set_decrypt_key: pxor %xmm0,%xmm0 .Ldec_key_ret: add \$8,%rsp +.cfi_adjust_cfa_offset -8 ret +.cfi_endproc .LSEH_end_set_decrypt_key: .size ${PREFIX}_set_decrypt_key,.-${PREFIX}_set_decrypt_key ___ -# This is based on submission by -# -# Huang Ying -# Vinodh Gopal +# This is based on submission from Intel by +# Huang Ying +# Vinodh Gopal # Kahraman Akdemir # # Aggressively optimized in respect to aeskeygenassist's critical path @@ -4271,7 +4326,9 @@ $code.=<<___; .align 16 ${PREFIX}_set_encrypt_key: __aesni_set_encrypt_key: +.cfi_startproc .byte 0x48,0x83,0xEC,0x08 # sub rsp,8 +.cfi_adjust_cfa_offset 8 mov \$-1,%rax test $inp,$inp jz .Lenc_key_ret @@ -4461,7 +4518,7 @@ __aesni_set_encrypt_key: .align 16 .L14rounds: - movups 16($inp),%xmm2 # remaning half of *userKey + movups 16($inp),%xmm2 # remaining half of *userKey mov \$13,$bits # 14 rounds for 256 lea 16(%rax),%rax cmp \$`1<<28`,%r10d # AVX, but no XOP @@ -4565,7 +4622,9 @@ __aesni_set_encrypt_key: pxor %xmm4,%xmm4 pxor %xmm5,%xmm5 add \$8,%rsp +.cfi_adjust_cfa_offset -8 ret +.cfi_endproc .LSEH_end_set_encrypt_key: .align 16 diff --git a/crypto/fipsmodule/aes/asm/aesv8-armx.pl b/crypto/fipsmodule/aes/asm/aesv8-armx.pl index 662814a97..2fc616e77 100644 --- a/crypto/fipsmodule/aes/asm/aesv8-armx.pl +++ b/crypto/fipsmodule/aes/asm/aesv8-armx.pl @@ -929,7 +929,7 @@ if ($flavour =~ /64/) { ######## 64-bit code s/^(\s+)v/$1/o or # strip off v prefix s/\bbx\s+lr\b/ret/o; - # fix up remainig legacy suffixes + # fix up remaining legacy suffixes s/\.[ui]?8//o; m/\],#8/o and s/\.16b/\.8b/go; s/\.[ui]?32//o and s/\.16b/\.4s/go; @@ -988,7 +988,7 @@ if ($flavour =~ /64/) { ######## 64-bit code s/\bv([0-9])\.[12468]+[bsd]\b/q$1/go; # new->old registers s/\/\/\s?/@ /o; # new->old style commentary - # fix up remainig new-style suffixes + # fix up remaining new-style suffixes s/\{q([0-9]+)\},\s*\[(.+)\],#8/sprintf "{d%d},[$2]!",2*$1/eo or s/\],#[0-9]+/]!/o; diff --git a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl index 8d9ee6ba9..607f8e7a0 100644 --- a/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl +++ b/crypto/fipsmodule/aes/asm/bsaes-x86_64.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2011-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + ################################################################### ### AES-128 [originally in CTR mode] ### @@ -1158,15 +1165,23 @@ $code.=<<___; .type bsaes_ecb_encrypt_blocks,\@abi-omnipotent .align 16 bsaes_ecb_encrypt_blocks: +.cfi_startproc mov %rsp, %rax .Lecb_enc_prologue: push %rbp +.cfi_push %rbp push %rbx +.cfi_push %rbx push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea -0x48(%rsp),%rsp +.cfi_adjust_cfa_offset 0x48 ___ $code.=<<___ if ($win64); lea -0xa0(%rsp), %rsp @@ -1184,6 +1199,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rsp,%rbp # backup %rsp +.cfi_def_cfa_register %rbp mov 240($arg4),%eax # rounds mov $arg1,$inp # backup arguments mov $arg2,$out @@ -1328,6 +1344,7 @@ $code.=<<___; jb .Lecb_enc_bzero lea 0x78(%rbp),%rax +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x40(%rbp), %xmm6 @@ -1345,29 +1362,45 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax), %r15 +.cfi_restore %r15 mov -40(%rax), %r14 +.cfi_restore %r14 mov -32(%rax), %r13 +.cfi_restore %r13 mov -24(%rax), %r12 +.cfi_restore %r12 mov -16(%rax), %rbx +.cfi_restore %rbx mov -8(%rax), %rbp +.cfi_restore %rbp lea (%rax), %rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lecb_enc_epilogue: ret +.cfi_endproc .size bsaes_ecb_encrypt_blocks,.-bsaes_ecb_encrypt_blocks .globl bsaes_ecb_decrypt_blocks .type bsaes_ecb_decrypt_blocks,\@abi-omnipotent .align 16 bsaes_ecb_decrypt_blocks: +.cfi_startproc mov %rsp, %rax .Lecb_dec_prologue: push %rbp +.cfi_push %rbp push %rbx +.cfi_push %rbx push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea -0x48(%rsp),%rsp +.cfi_adjust_cfa_offset 0x48 ___ $code.=<<___ if ($win64); lea -0xa0(%rsp), %rsp @@ -1385,6 +1418,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rsp,%rbp # backup %rsp +.cfi_def_cfa_register %rbp mov 240($arg4),%eax # rounds mov $arg1,$inp # backup arguments mov $arg2,$out @@ -1530,6 +1564,7 @@ $code.=<<___; jb .Lecb_dec_bzero lea 0x78(%rbp),%rax +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x40(%rbp), %xmm6 @@ -1547,14 +1582,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax), %r15 +.cfi_restore %r15 mov -40(%rax), %r14 +.cfi_restore %r14 mov -32(%rax), %r13 +.cfi_restore %r13 mov -24(%rax), %r12 +.cfi_restore %r12 mov -16(%rax), %rbx +.cfi_restore %rbx mov -8(%rax), %rbp +.cfi_restore %rbp lea (%rax), %rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lecb_dec_epilogue: ret +.cfi_endproc .size bsaes_ecb_decrypt_blocks,.-bsaes_ecb_decrypt_blocks ___ } @@ -1564,6 +1607,7 @@ $code.=<<___; .type bsaes_cbc_encrypt,\@abi-omnipotent .align 16 bsaes_cbc_encrypt: +.cfi_startproc ___ $code.=<<___ if ($win64); mov 48(%rsp),$arg6 # pull direction flag @@ -1577,12 +1621,19 @@ $code.=<<___; mov %rsp, %rax .Lcbc_dec_prologue: push %rbp +.cfi_push %rbp push %rbx +.cfi_push %rbx push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea -0x48(%rsp), %rsp +.cfi_adjust_cfa_offset 0x48 ___ $code.=<<___ if ($win64); mov 0xa0(%rsp),$arg5 # pull ivp @@ -1601,6 +1652,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rsp, %rbp # backup %rsp +.cfi_def_cfa_register %rbp mov 240($arg4), %eax # rounds mov $arg1, $inp # backup arguments mov $arg2, $out @@ -1820,6 +1872,7 @@ $code.=<<___; ja .Lcbc_dec_bzero lea 0x78(%rbp),%rax +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x40(%rbp), %xmm6 @@ -1837,29 +1890,45 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax), %r15 +.cfi_restore %r15 mov -40(%rax), %r14 +.cfi_restore %r14 mov -32(%rax), %r13 +.cfi_restore %r13 mov -24(%rax), %r12 +.cfi_restore %r12 mov -16(%rax), %rbx +.cfi_restore %rbx mov -8(%rax), %rbp +.cfi_restore %rbp lea (%rax), %rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lcbc_dec_epilogue: ret +.cfi_endproc .size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt .globl bsaes_ctr32_encrypt_blocks .type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent .align 16 bsaes_ctr32_encrypt_blocks: +.cfi_startproc mov %rsp, %rax .Lctr_enc_prologue: push %rbp +.cfi_push %rbp push %rbx +.cfi_push %rbx push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea -0x48(%rsp), %rsp +.cfi_adjust_cfa_offset 0x48 ___ $code.=<<___ if ($win64); mov 0xa0(%rsp),$arg5 # pull ivp @@ -1878,6 +1947,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rsp, %rbp # backup %rsp +.cfi_def_cfa_register %rbp movdqu ($arg5), %xmm0 # load counter mov 240($arg4), %eax # rounds mov $arg1, $inp # backup arguments @@ -2052,6 +2122,7 @@ $code.=<<___; ja .Lctr_enc_bzero lea 0x78(%rbp),%rax +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x40(%rbp), %xmm6 @@ -2069,14 +2140,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax), %r15 +.cfi_restore %r15 mov -40(%rax), %r14 +.cfi_restore %r14 mov -32(%rax), %r13 +.cfi_restore %r13 mov -24(%rax), %r12 +.cfi_restore %r12 mov -16(%rax), %rbx +.cfi_restore %rbx mov -8(%rax), %rbp +.cfi_restore %rbp lea (%rax), %rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lctr_enc_epilogue: ret +.cfi_endproc .size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks ___ ###################################################################### @@ -2092,15 +2171,23 @@ $code.=<<___; .type bsaes_xts_encrypt,\@abi-omnipotent .align 16 bsaes_xts_encrypt: +.cfi_startproc mov %rsp, %rax .Lxts_enc_prologue: push %rbp +.cfi_push %rbp push %rbx +.cfi_push %rbx push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea -0x48(%rsp), %rsp +.cfi_adjust_cfa_offset 0x48 ___ $code.=<<___ if ($win64); mov 0xa0(%rsp),$arg5 # pull key2 @@ -2120,6 +2207,7 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov %rsp, %rbp # backup %rsp +.cfi_def_cfa_register %rbp mov $arg1, $inp # backup arguments mov $arg2, $out mov $arg3, $len @@ -2442,6 +2530,7 @@ $code.=<<___; ja .Lxts_enc_bzero lea 0x78(%rbp),%rax +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x40(%rbp), %xmm6 @@ -2459,29 +2548,45 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax), %r15 +.cfi_restore %r15 mov -40(%rax), %r14 +.cfi_restore %r14 mov -32(%rax), %r13 +.cfi_restore %r13 mov -24(%rax), %r12 +.cfi_restore %r12 mov -16(%rax), %rbx +.cfi_restore %rbx mov -8(%rax), %rbp +.cfi_restore %rbp lea (%rax), %rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lxts_enc_epilogue: ret +.cfi_endproc .size bsaes_xts_encrypt,.-bsaes_xts_encrypt .globl bsaes_xts_decrypt .type bsaes_xts_decrypt,\@abi-omnipotent .align 16 bsaes_xts_decrypt: +.cfi_startproc mov %rsp, %rax .Lxts_dec_prologue: push %rbp +.cfi_push %rbp push %rbx +.cfi_push %rbx push %r12 +.cfi_push %r12 push %r13 +.cfi_push %r13 push %r14 +.cfi_push %r14 push %r15 +.cfi_push %r15 lea -0x48(%rsp), %rsp +.cfi_adjust_cfa_offset 0x48 ___ $code.=<<___ if ($win64); mov 0xa0(%rsp),$arg5 # pull key2 @@ -2849,6 +2954,7 @@ $code.=<<___; ja .Lxts_dec_bzero lea 0x78(%rbp),%rax +.cfi_def_cfa %rax,8 ___ $code.=<<___ if ($win64); movaps 0x40(%rbp), %xmm6 @@ -2866,14 +2972,22 @@ $code.=<<___ if ($win64); ___ $code.=<<___; mov -48(%rax), %r15 +.cfi_restore %r15 mov -40(%rax), %r14 +.cfi_restore %r14 mov -32(%rax), %r13 +.cfi_restore %r13 mov -24(%rax), %r12 +.cfi_restore %r12 mov -16(%rax), %rbx +.cfi_restore %rbx mov -8(%rax), %rbp +.cfi_restore %rbp lea (%rax), %rsp # restore %rsp +.cfi_def_cfa_register %rsp .Lxts_dec_epilogue: ret +.cfi_endproc .size bsaes_xts_decrypt,.-bsaes_xts_decrypt ___ }