# Copyright (c) 2011-2012, Andy Polyakov # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain copyright notices, # this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials # provided with the distribution. # # * Neither the name of the Andy Polyakov nor the names of its # copyright holder and contributors may be used to endorse or # promote products derived from this software without specific # prior written permission. # # ALTERNATIVELY, provided that this notice is retained in full, this # product may be distributed under the terms of the GNU General Public # License (GPL), in which case the provisions of the GPL apply INSTEAD OF # those given above. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # # *** This file is auto-generated *** # .text .globl _aesni_encrypt .p2align 4 _aesni_encrypt: movups (%rdi),%xmm2 movl 240(%rdx),%eax movups (%rdx),%xmm0 movups 16(%rdx),%xmm1 leaq 32(%rdx),%rdx xorps %xmm0,%xmm2 L$oop_enc1_1: .byte 102,15,56,220,209 decl %eax movups (%rdx),%xmm1 leaq 16(%rdx),%rdx jnz L$oop_enc1_1 .byte 102,15,56,221,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 .globl _aesni_decrypt .p2align 4 _aesni_decrypt: movups (%rdi),%xmm2 movl 240(%rdx),%eax movups (%rdx),%xmm0 movups 16(%rdx),%xmm1 leaq 32(%rdx),%rdx xorps %xmm0,%xmm2 L$oop_dec1_2: .byte 102,15,56,222,209 decl %eax movups (%rdx),%xmm1 leaq 16(%rdx),%rdx jnz L$oop_dec1_2 .byte 102,15,56,223,209 movups %xmm2,(%rsi) .byte 0xf3,0xc3 .p2align 4 _aesni_encrypt3: movups (%rcx),%xmm0 shrl $1,%eax movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 movups (%rcx),%xmm0 L$enc_loop3: .byte 102,15,56,220,209 .byte 102,15,56,220,217 decl %eax .byte 102,15,56,220,225 movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 leaq 32(%rcx),%rcx .byte 102,15,56,220,224 movups (%rcx),%xmm0 jnz L$enc_loop3 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 0xf3,0xc3 .p2align 4 _aesni_decrypt3: movups (%rcx),%xmm0 shrl $1,%eax movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 movups (%rcx),%xmm0 L$dec_loop3: .byte 102,15,56,222,209 .byte 102,15,56,222,217 decl %eax .byte 102,15,56,222,225 movups 16(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 leaq 32(%rcx),%rcx .byte 102,15,56,222,224 movups (%rcx),%xmm0 jnz L$dec_loop3 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 0xf3,0xc3 .p2align 4 _aesni_encrypt4: movups (%rcx),%xmm0 shrl $1,%eax movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 xorps %xmm0,%xmm5 movups (%rcx),%xmm0 L$enc_loop4: .byte 102,15,56,220,209 .byte 102,15,56,220,217 decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 movups (%rcx),%xmm0 jnz L$enc_loop4 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 0xf3,0xc3 .p2align 4 _aesni_decrypt4: movups (%rcx),%xmm0 shrl $1,%eax movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 xorps %xmm0,%xmm4 xorps %xmm0,%xmm5 movups (%rcx),%xmm0 L$dec_loop4: .byte 102,15,56,222,209 .byte 102,15,56,222,217 decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 movups 16(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 movups (%rcx),%xmm0 jnz L$dec_loop4 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 .byte 0xf3,0xc3 .p2align 4 _aesni_encrypt6: movups (%rcx),%xmm0 shrl $1,%eax movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 .byte 102,15,56,220,209 pxor %xmm0,%xmm4 .byte 102,15,56,220,217 pxor %xmm0,%xmm5 .byte 102,15,56,220,225 pxor %xmm0,%xmm6 .byte 102,15,56,220,233 pxor %xmm0,%xmm7 decl %eax .byte 102,15,56,220,241 movups (%rcx),%xmm0 .byte 102,15,56,220,249 jmp L$enc_loop6_enter .p2align 4 L$enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 L$enc_loop6_enter: movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups (%rcx),%xmm0 jnz L$enc_loop6 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 102,15,56,221,240 .byte 102,15,56,221,248 .byte 0xf3,0xc3 .p2align 4 _aesni_decrypt6: movups (%rcx),%xmm0 shrl $1,%eax movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 pxor %xmm0,%xmm3 .byte 102,15,56,222,209 pxor %xmm0,%xmm4 .byte 102,15,56,222,217 pxor %xmm0,%xmm5 .byte 102,15,56,222,225 pxor %xmm0,%xmm6 .byte 102,15,56,222,233 pxor %xmm0,%xmm7 decl %eax .byte 102,15,56,222,241 movups (%rcx),%xmm0 .byte 102,15,56,222,249 jmp L$dec_loop6_enter .p2align 4 L$dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 L$dec_loop6_enter: movups 16(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups (%rcx),%xmm0 jnz L$dec_loop6 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 .byte 102,15,56,223,240 .byte 102,15,56,223,248 .byte 0xf3,0xc3 .p2align 4 _aesni_encrypt8: movups (%rcx),%xmm0 shrl $1,%eax movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 .byte 102,15,56,220,209 pxor %xmm0,%xmm4 .byte 102,15,56,220,217 pxor %xmm0,%xmm5 .byte 102,15,56,220,225 pxor %xmm0,%xmm6 .byte 102,15,56,220,233 pxor %xmm0,%xmm7 decl %eax .byte 102,15,56,220,241 pxor %xmm0,%xmm8 .byte 102,15,56,220,249 pxor %xmm0,%xmm9 movups (%rcx),%xmm0 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 16(%rcx),%xmm1 jmp L$enc_loop8_enter .p2align 4 L$enc_loop8: .byte 102,15,56,220,209 .byte 102,15,56,220,217 decl %eax .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 16(%rcx),%xmm1 L$enc_loop8_enter: .byte 102,15,56,220,208 .byte 102,15,56,220,216 leaq 32(%rcx),%rcx .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups (%rcx),%xmm0 jnz L$enc_loop8 .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 .byte 102,15,56,221,208 .byte 102,15,56,221,216 .byte 102,15,56,221,224 .byte 102,15,56,221,232 .byte 102,15,56,221,240 .byte 102,15,56,221,248 .byte 102,68,15,56,221,192 .byte 102,68,15,56,221,200 .byte 0xf3,0xc3 .p2align 4 _aesni_decrypt8: movups (%rcx),%xmm0 shrl $1,%eax movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 xorps %xmm0,%xmm3 .byte 102,15,56,222,209 pxor %xmm0,%xmm4 .byte 102,15,56,222,217 pxor %xmm0,%xmm5 .byte 102,15,56,222,225 pxor %xmm0,%xmm6 .byte 102,15,56,222,233 pxor %xmm0,%xmm7 decl %eax .byte 102,15,56,222,241 pxor %xmm0,%xmm8 .byte 102,15,56,222,249 pxor %xmm0,%xmm9 movups (%rcx),%xmm0 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 16(%rcx),%xmm1 jmp L$dec_loop8_enter .p2align 4 L$dec_loop8: .byte 102,15,56,222,209 .byte 102,15,56,222,217 decl %eax .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 16(%rcx),%xmm1 L$dec_loop8_enter: .byte 102,15,56,222,208 .byte 102,15,56,222,216 leaq 32(%rcx),%rcx .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups (%rcx),%xmm0 jnz L$dec_loop8 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 .byte 102,15,56,223,208 .byte 102,15,56,223,216 .byte 102,15,56,223,224 .byte 102,15,56,223,232 .byte 102,15,56,223,240 .byte 102,15,56,223,248 .byte 102,68,15,56,223,192 .byte 102,68,15,56,223,200 .byte 0xf3,0xc3 .globl _aesni_ecb_encrypt .p2align 4 _aesni_ecb_encrypt: andq $-16,%rdx jz L$ecb_ret movl 240(%rcx),%eax movups (%rcx),%xmm0 movq %rcx,%r11 movl %eax,%r10d testl %r8d,%r8d jz L$ecb_decrypt cmpq $128,%rdx jb L$ecb_enc_tail movdqu (%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqu 32(%rdi),%xmm4 movdqu 48(%rdi),%xmm5 movdqu 64(%rdi),%xmm6 movdqu 80(%rdi),%xmm7 movdqu 96(%rdi),%xmm8 movdqu 112(%rdi),%xmm9 leaq 128(%rdi),%rdi subq $128,%rdx jmp L$ecb_enc_loop8_enter .p2align 4 L$ecb_enc_loop8: movups %xmm2,(%rsi) movq %r11,%rcx movdqu (%rdi),%xmm2 movl %r10d,%eax movups %xmm3,16(%rsi) movdqu 16(%rdi),%xmm3 movups %xmm4,32(%rsi) movdqu 32(%rdi),%xmm4 movups %xmm5,48(%rsi) movdqu 48(%rdi),%xmm5 movups %xmm6,64(%rsi) movdqu 64(%rdi),%xmm6 movups %xmm7,80(%rsi) movdqu 80(%rdi),%xmm7 movups %xmm8,96(%rsi) movdqu 96(%rdi),%xmm8 movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi movdqu 112(%rdi),%xmm9 leaq 128(%rdi),%rdi L$ecb_enc_loop8_enter: call _aesni_encrypt8 subq $128,%rdx jnc L$ecb_enc_loop8 movups %xmm2,(%rsi) movq %r11,%rcx movups %xmm3,16(%rsi) movl %r10d,%eax movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) movups %xmm7,80(%rsi) movups %xmm8,96(%rsi) movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi addq $128,%rdx jz L$ecb_ret L$ecb_enc_tail: movups (%rdi),%xmm2 cmpq $32,%rdx jb L$ecb_enc_one movups 16(%rdi),%xmm3 je L$ecb_enc_two movups 32(%rdi),%xmm4 cmpq $64,%rdx jb L$ecb_enc_three movups 48(%rdi),%xmm5 je L$ecb_enc_four movups 64(%rdi),%xmm6 cmpq $96,%rdx jb L$ecb_enc_five movups 80(%rdi),%xmm7 je L$ecb_enc_six movdqu 96(%rdi),%xmm8 call _aesni_encrypt8 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) movups %xmm7,80(%rsi) movups %xmm8,96(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_enc_one: movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_enc1_3: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_3 .byte 102,15,56,221,209 movups %xmm2,(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_enc_two: xorps %xmm4,%xmm4 call _aesni_encrypt3 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_enc_three: call _aesni_encrypt3 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_enc_four: call _aesni_encrypt4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_enc_five: xorps %xmm7,%xmm7 call _aesni_encrypt6 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_enc_six: call _aesni_encrypt6 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) movups %xmm7,80(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_decrypt: cmpq $128,%rdx jb L$ecb_dec_tail movdqu (%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqu 32(%rdi),%xmm4 movdqu 48(%rdi),%xmm5 movdqu 64(%rdi),%xmm6 movdqu 80(%rdi),%xmm7 movdqu 96(%rdi),%xmm8 movdqu 112(%rdi),%xmm9 leaq 128(%rdi),%rdi subq $128,%rdx jmp L$ecb_dec_loop8_enter .p2align 4 L$ecb_dec_loop8: movups %xmm2,(%rsi) movq %r11,%rcx movdqu (%rdi),%xmm2 movl %r10d,%eax movups %xmm3,16(%rsi) movdqu 16(%rdi),%xmm3 movups %xmm4,32(%rsi) movdqu 32(%rdi),%xmm4 movups %xmm5,48(%rsi) movdqu 48(%rdi),%xmm5 movups %xmm6,64(%rsi) movdqu 64(%rdi),%xmm6 movups %xmm7,80(%rsi) movdqu 80(%rdi),%xmm7 movups %xmm8,96(%rsi) movdqu 96(%rdi),%xmm8 movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi movdqu 112(%rdi),%xmm9 leaq 128(%rdi),%rdi L$ecb_dec_loop8_enter: call _aesni_decrypt8 movups (%r11),%xmm0 subq $128,%rdx jnc L$ecb_dec_loop8 movups %xmm2,(%rsi) movq %r11,%rcx movups %xmm3,16(%rsi) movl %r10d,%eax movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) movups %xmm7,80(%rsi) movups %xmm8,96(%rsi) movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi addq $128,%rdx jz L$ecb_ret L$ecb_dec_tail: movups (%rdi),%xmm2 cmpq $32,%rdx jb L$ecb_dec_one movups 16(%rdi),%xmm3 je L$ecb_dec_two movups 32(%rdi),%xmm4 cmpq $64,%rdx jb L$ecb_dec_three movups 48(%rdi),%xmm5 je L$ecb_dec_four movups 64(%rdi),%xmm6 cmpq $96,%rdx jb L$ecb_dec_five movups 80(%rdi),%xmm7 je L$ecb_dec_six movups 96(%rdi),%xmm8 movups (%rcx),%xmm0 call _aesni_decrypt8 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) movups %xmm7,80(%rsi) movups %xmm8,96(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_dec_one: movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_dec1_4: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_4 .byte 102,15,56,223,209 movups %xmm2,(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_dec_two: xorps %xmm4,%xmm4 call _aesni_decrypt3 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_dec_three: call _aesni_decrypt3 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_dec_four: call _aesni_decrypt4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_dec_five: xorps %xmm7,%xmm7 call _aesni_decrypt6 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) jmp L$ecb_ret .p2align 4 L$ecb_dec_six: call _aesni_decrypt6 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) movups %xmm5,48(%rsi) movups %xmm6,64(%rsi) movups %xmm7,80(%rsi) L$ecb_ret: .byte 0xf3,0xc3 .globl _aesni_ccm64_encrypt_blocks .p2align 4 _aesni_ccm64_encrypt_blocks: movl 240(%rcx),%eax movdqu (%r8),%xmm9 movdqa L$increment64(%rip),%xmm6 movdqa L$bswap_mask(%rip),%xmm7 shrl $1,%eax leaq 0(%rcx),%r11 movdqu (%r9),%xmm3 movdqa %xmm9,%xmm2 movl %eax,%r10d .byte 102,68,15,56,0,207 jmp L$ccm64_enc_outer .p2align 4 L$ccm64_enc_outer: movups (%r11),%xmm0 movl %r10d,%eax movups (%rdi),%xmm8 xorps %xmm0,%xmm2 movups 16(%r11),%xmm1 xorps %xmm8,%xmm0 leaq 32(%r11),%rcx xorps %xmm0,%xmm3 movups (%rcx),%xmm0 L$ccm64_enc2_loop: .byte 102,15,56,220,209 decl %eax .byte 102,15,56,220,217 movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 leaq 32(%rcx),%rcx .byte 102,15,56,220,216 movups 0(%rcx),%xmm0 jnz L$ccm64_enc2_loop .byte 102,15,56,220,209 .byte 102,15,56,220,217 paddq %xmm6,%xmm9 .byte 102,15,56,221,208 .byte 102,15,56,221,216 decq %rdx leaq 16(%rdi),%rdi xorps %xmm2,%xmm8 movdqa %xmm9,%xmm2 movups %xmm8,(%rsi) leaq 16(%rsi),%rsi .byte 102,15,56,0,215 jnz L$ccm64_enc_outer movups %xmm3,(%r9) .byte 0xf3,0xc3 .globl _aesni_ccm64_decrypt_blocks .p2align 4 _aesni_ccm64_decrypt_blocks: movl 240(%rcx),%eax movups (%r8),%xmm9 movdqu (%r9),%xmm3 movdqa L$increment64(%rip),%xmm6 movdqa L$bswap_mask(%rip),%xmm7 movaps %xmm9,%xmm2 movl %eax,%r10d movq %rcx,%r11 .byte 102,68,15,56,0,207 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_enc1_5: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_5 .byte 102,15,56,221,209 movups (%rdi),%xmm8 paddq %xmm6,%xmm9 leaq 16(%rdi),%rdi jmp L$ccm64_dec_outer .p2align 4 L$ccm64_dec_outer: xorps %xmm2,%xmm8 movdqa %xmm9,%xmm2 movl %r10d,%eax movups %xmm8,(%rsi) leaq 16(%rsi),%rsi .byte 102,15,56,0,215 subq $1,%rdx jz L$ccm64_dec_break movups (%r11),%xmm0 shrl $1,%eax movups 16(%r11),%xmm1 xorps %xmm0,%xmm8 leaq 32(%r11),%rcx xorps %xmm0,%xmm2 xorps %xmm8,%xmm3 movups (%rcx),%xmm0 L$ccm64_dec2_loop: .byte 102,15,56,220,209 decl %eax .byte 102,15,56,220,217 movups 16(%rcx),%xmm1 .byte 102,15,56,220,208 leaq 32(%rcx),%rcx .byte 102,15,56,220,216 movups 0(%rcx),%xmm0 jnz L$ccm64_dec2_loop movups (%rdi),%xmm8 paddq %xmm6,%xmm9 .byte 102,15,56,220,209 .byte 102,15,56,220,217 leaq 16(%rdi),%rdi .byte 102,15,56,221,208 .byte 102,15,56,221,216 jmp L$ccm64_dec_outer .p2align 4 L$ccm64_dec_break: movups (%r11),%xmm0 movups 16(%r11),%xmm1 xorps %xmm0,%xmm8 leaq 32(%r11),%r11 xorps %xmm8,%xmm3 L$oop_enc1_6: .byte 102,15,56,220,217 decl %eax movups (%r11),%xmm1 leaq 16(%r11),%r11 jnz L$oop_enc1_6 .byte 102,15,56,221,217 movups %xmm3,(%r9) .byte 0xf3,0xc3 .globl _aesni_ctr32_encrypt_blocks .p2align 4 _aesni_ctr32_encrypt_blocks: leaq (%rsp),%rax pushq %rbp subq $128,%rsp andq $-16,%rsp leaq -8(%rax),%rbp cmpq $1,%rdx je L$ctr32_one_shortcut movdqu (%r8),%xmm2 movdqu (%rcx),%xmm0 movl 12(%r8),%r8d pxor %xmm0,%xmm2 movl 12(%rcx),%r11d movdqa %xmm2,0(%rsp) bswapl %r8d movdqa %xmm2,%xmm3 movdqa %xmm2,%xmm4 movdqa %xmm2,%xmm5 movdqa %xmm2,64(%rsp) movdqa %xmm2,80(%rsp) movdqa %xmm2,96(%rsp) movdqa %xmm2,112(%rsp) movl 240(%rcx),%eax leaq 1(%r8),%r9 leaq 2(%r8),%r10 bswapl %r9d bswapl %r10d xorl %r11d,%r9d xorl %r11d,%r10d .byte 102,65,15,58,34,217,3 leaq 3(%r8),%r9 movdqa %xmm3,16(%rsp) .byte 102,65,15,58,34,226,3 bswapl %r9d leaq 4(%r8),%r10 movdqa %xmm4,32(%rsp) xorl %r11d,%r9d bswapl %r10d .byte 102,65,15,58,34,233,3 xorl %r11d,%r10d movdqa %xmm5,48(%rsp) leaq 5(%r8),%r9 movl %r10d,64+12(%rsp) bswapl %r9d leaq 6(%r8),%r10 xorl %r11d,%r9d bswapl %r10d movl %r9d,80+12(%rsp) xorl %r11d,%r10d leaq 7(%r8),%r9 movl %r10d,96+12(%rsp) bswapl %r9d xorl %r11d,%r9d movl %r9d,112+12(%rsp) movups 16(%rcx),%xmm1 movdqa 64(%rsp),%xmm6 movdqa 80(%rsp),%xmm7 cmpq $8,%rdx jb L$ctr32_tail leaq 128(%rcx),%rcx subq $8,%rdx jmp L$ctr32_loop8 .p2align 5 L$ctr32_loop8: addl $8,%r8d movdqa 96(%rsp),%xmm8 .byte 102,15,56,220,209 movl %r8d,%r9d movdqa 112(%rsp),%xmm9 .byte 102,15,56,220,217 bswapl %r9d movups 32-128(%rcx),%xmm0 .byte 102,15,56,220,225 xorl %r11d,%r9d .byte 102,15,56,220,233 movl %r9d,0+12(%rsp) leaq 1(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 48-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 bswapl %r9d .byte 102,15,56,220,224 xorl %r11d,%r9d .byte 102,15,56,220,232 movl %r9d,16+12(%rsp) leaq 2(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 64-128(%rcx),%xmm0 .byte 102,15,56,220,209 .byte 102,15,56,220,217 bswapl %r9d .byte 102,15,56,220,225 xorl %r11d,%r9d .byte 102,15,56,220,233 movl %r9d,32+12(%rsp) leaq 3(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 80-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 bswapl %r9d .byte 102,15,56,220,224 xorl %r11d,%r9d .byte 102,15,56,220,232 movl %r9d,48+12(%rsp) leaq 4(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 96-128(%rcx),%xmm0 .byte 102,15,56,220,209 .byte 102,15,56,220,217 bswapl %r9d .byte 102,15,56,220,225 xorl %r11d,%r9d .byte 102,15,56,220,233 movl %r9d,64+12(%rsp) leaq 5(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 112-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 bswapl %r9d .byte 102,15,56,220,224 xorl %r11d,%r9d .byte 102,15,56,220,232 movl %r9d,80+12(%rsp) leaq 6(%r8),%r9 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 128-128(%rcx),%xmm0 .byte 102,15,56,220,209 .byte 102,15,56,220,217 bswapl %r9d .byte 102,15,56,220,225 xorl %r11d,%r9d .byte 102,15,56,220,233 movl %r9d,96+12(%rsp) leaq 7(%r8),%r9 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 144-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 bswapl %r9d .byte 102,15,56,220,224 xorl %r11d,%r9d .byte 102,15,56,220,232 movl %r9d,112+12(%rsp) .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 movdqu 0(%rdi),%xmm10 .byte 102,68,15,56,220,200 movups 160-128(%rcx),%xmm0 cmpl $11,%eax jb L$ctr32_enc_done .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 176-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 192-128(%rcx),%xmm0 je L$ctr32_enc_done .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movups 208-128(%rcx),%xmm1 .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 .byte 102,68,15,56,220,192 .byte 102,68,15,56,220,200 movups 224-128(%rcx),%xmm0 L$ctr32_enc_done: movdqu 16(%rdi),%xmm11 pxor %xmm0,%xmm10 movdqu 32(%rdi),%xmm12 pxor %xmm0,%xmm11 movdqu 48(%rdi),%xmm13 pxor %xmm0,%xmm12 movdqu 64(%rdi),%xmm14 pxor %xmm0,%xmm13 movdqu 80(%rdi),%xmm15 pxor %xmm0,%xmm14 .byte 102,15,56,220,209 pxor %xmm0,%xmm15 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 .byte 102,68,15,56,220,193 .byte 102,68,15,56,220,201 movdqu 96(%rdi),%xmm1 .byte 102,65,15,56,221,210 pxor %xmm0,%xmm1 movdqu 112(%rdi),%xmm10 leaq 128(%rdi),%rdi .byte 102,65,15,56,221,219 pxor %xmm0,%xmm10 movdqa 0(%rsp),%xmm11 .byte 102,65,15,56,221,228 movdqa 16(%rsp),%xmm12 .byte 102,65,15,56,221,237 movdqa 32(%rsp),%xmm13 .byte 102,65,15,56,221,246 movdqa 48(%rsp),%xmm14 .byte 102,65,15,56,221,255 movdqa 64(%rsp),%xmm15 .byte 102,68,15,56,221,193 movdqa 80(%rsp),%xmm0 .byte 102,69,15,56,221,202 movups 16-128(%rcx),%xmm1 movups %xmm2,(%rsi) movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) movdqa %xmm14,%xmm5 movups %xmm6,64(%rsi) movdqa %xmm15,%xmm6 movups %xmm7,80(%rsi) movdqa %xmm0,%xmm7 movups %xmm8,96(%rsi) movups %xmm9,112(%rsi) leaq 128(%rsi),%rsi subq $8,%rdx jnc L$ctr32_loop8 addq $8,%rdx jz L$ctr32_done leaq -128(%rcx),%rcx L$ctr32_tail: leaq 16(%rcx),%rcx cmpq $4,%rdx jb L$ctr32_loop3 je L$ctr32_loop4 movdqa 96(%rsp),%xmm8 pxor %xmm9,%xmm9 movups 16(%rcx),%xmm0 .byte 102,15,56,220,209 leaq 16(%rcx),%rcx .byte 102,15,56,220,217 shrl $1,%eax .byte 102,15,56,220,225 decl %eax .byte 102,15,56,220,233 movups (%rdi),%xmm10 .byte 102,15,56,220,241 movups 16(%rdi),%xmm11 .byte 102,15,56,220,249 movups 32(%rdi),%xmm12 .byte 102,68,15,56,220,193 movups 16(%rcx),%xmm1 call L$enc_loop8_enter movdqu 48(%rdi),%xmm13 pxor %xmm10,%xmm2 movdqu 64(%rdi),%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm10,%xmm6 movdqu %xmm5,48(%rsi) movdqu %xmm6,64(%rsi) cmpq $6,%rdx jb L$ctr32_done movups 80(%rdi),%xmm11 xorps %xmm11,%xmm7 movups %xmm7,80(%rsi) je L$ctr32_done movups 96(%rdi),%xmm12 xorps %xmm12,%xmm8 movups %xmm8,96(%rsi) jmp L$ctr32_done .p2align 5 L$ctr32_loop4: .byte 102,15,56,220,209 leaq 16(%rcx),%rcx .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 movups (%rcx),%xmm1 decl %eax jnz L$ctr32_loop4 .byte 102,15,56,221,209 movups (%rdi),%xmm10 .byte 102,15,56,221,217 movups 16(%rdi),%xmm11 .byte 102,15,56,221,225 movups 32(%rdi),%xmm12 .byte 102,15,56,221,233 movups 48(%rdi),%xmm13 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) xorps %xmm11,%xmm3 movups %xmm3,16(%rsi) pxor %xmm12,%xmm4 movdqu %xmm4,32(%rsi) pxor %xmm13,%xmm5 movdqu %xmm5,48(%rsi) jmp L$ctr32_done .p2align 5 L$ctr32_loop3: .byte 102,15,56,220,209 leaq 16(%rcx),%rcx .byte 102,15,56,220,217 .byte 102,15,56,220,225 movups (%rcx),%xmm1 decl %eax jnz L$ctr32_loop3 .byte 102,15,56,221,209 .byte 102,15,56,221,217 .byte 102,15,56,221,225 movups (%rdi),%xmm10 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) cmpq $2,%rdx jb L$ctr32_done movups 16(%rdi),%xmm11 xorps %xmm11,%xmm3 movups %xmm3,16(%rsi) je L$ctr32_done movups 32(%rdi),%xmm12 xorps %xmm12,%xmm4 movups %xmm4,32(%rsi) jmp L$ctr32_done .p2align 4 L$ctr32_one_shortcut: movups (%r8),%xmm2 movups (%rdi),%xmm10 movl 240(%rcx),%eax movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_enc1_7: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_7 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) jmp L$ctr32_done .p2align 4 L$ctr32_done: leaq (%rbp),%rsp popq %rbp L$ctr32_epilogue: .byte 0xf3,0xc3 .globl _aesni_xts_encrypt .p2align 4 _aesni_xts_encrypt: leaq (%rsp),%rax pushq %rbp subq $112,%rsp andq $-16,%rsp leaq -8(%rax),%rbp movups (%r9),%xmm15 movl 240(%r8),%eax movl 240(%rcx),%r10d movups (%r8),%xmm0 movups 16(%r8),%xmm1 leaq 32(%r8),%r8 xorps %xmm0,%xmm15 L$oop_enc1_8: .byte 102,68,15,56,220,249 decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 jnz L$oop_enc1_8 .byte 102,68,15,56,221,249 movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx movups 16(%rcx,%r10,1),%xmm1 movl %eax,%r10d movdqa L$xts_magic(%rip),%xmm8 pshufd $95,%xmm15,%xmm9 pxor %xmm0,%xmm1 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm10 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm10 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm11 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm11 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm12 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm12 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm13 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm13 pxor %xmm14,%xmm15 movdqa %xmm15,%xmm14 psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 pxor %xmm0,%xmm14 pxor %xmm9,%xmm15 movaps %xmm1,96(%rsp) subq $96,%rdx jc L$xts_enc_short shrl $1,%eax subl $3,%eax movups 16(%r11),%xmm1 movl %eax,%r10d leaq L$xts_magic(%rip),%r8 jmp L$xts_enc_grandloop .p2align 5 L$xts_enc_grandloop: movdqu 0(%rdi),%xmm2 movdqa %xmm0,%xmm8 movdqu 16(%rdi),%xmm3 pxor %xmm10,%xmm2 movdqu 32(%rdi),%xmm4 pxor %xmm11,%xmm3 .byte 102,15,56,220,209 movdqu 48(%rdi),%xmm5 pxor %xmm12,%xmm4 .byte 102,15,56,220,217 movdqu 64(%rdi),%xmm6 pxor %xmm13,%xmm5 .byte 102,15,56,220,225 movdqu 80(%rdi),%xmm7 pxor %xmm15,%xmm8 movdqa 96(%rsp),%xmm9 pxor %xmm14,%xmm6 .byte 102,15,56,220,233 movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi pxor %xmm8,%xmm7 pxor %xmm9,%xmm10 .byte 102,15,56,220,241 pxor %xmm9,%xmm11 movdqa %xmm10,0(%rsp) .byte 102,15,56,220,249 movups 48(%r11),%xmm1 .byte 102,15,56,220,208 pxor %xmm9,%xmm12 movdqa %xmm11,16(%rsp) .byte 102,15,56,220,216 pxor %xmm9,%xmm13 movdqa %xmm12,32(%rsp) .byte 102,15,56,220,224 pxor %xmm9,%xmm14 .byte 102,15,56,220,232 pxor %xmm9,%xmm8 movdqa %xmm14,64(%rsp) .byte 102,15,56,220,240 movdqa %xmm8,80(%rsp) .byte 102,15,56,220,248 movups 64(%r11),%xmm0 leaq 64(%r11),%rcx pshufd $95,%xmm15,%xmm9 jmp L$xts_enc_loop6 .p2align 5 L$xts_enc_loop6: .byte 102,15,56,220,209 .byte 102,15,56,220,217 .byte 102,15,56,220,225 .byte 102,15,56,220,233 .byte 102,15,56,220,241 .byte 102,15,56,220,249 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx .byte 102,15,56,220,208 .byte 102,15,56,220,216 .byte 102,15,56,220,224 .byte 102,15,56,220,232 .byte 102,15,56,220,240 .byte 102,15,56,220,248 movups (%rcx),%xmm0 decl %eax jnz L$xts_enc_loop6 movdqa (%r8),%xmm8 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,220,209 paddq %xmm15,%xmm15 psrad $31,%xmm14 .byte 102,15,56,220,217 pand %xmm8,%xmm14 movups (%r11),%xmm10 .byte 102,15,56,220,225 .byte 102,15,56,220,233 pxor %xmm14,%xmm15 .byte 102,15,56,220,241 movaps %xmm10,%xmm11 .byte 102,15,56,220,249 movups 16(%rcx),%xmm1 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,220,208 pxor %xmm15,%xmm10 psrad $31,%xmm14 .byte 102,15,56,220,216 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,220,224 .byte 102,15,56,220,232 pxor %xmm14,%xmm15 .byte 102,15,56,220,240 movaps %xmm11,%xmm12 .byte 102,15,56,220,248 movups 32(%rcx),%xmm0 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,220,209 pxor %xmm15,%xmm11 psrad $31,%xmm14 .byte 102,15,56,220,217 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,220,225 movdqa %xmm13,48(%rsp) .byte 102,15,56,220,233 pxor %xmm14,%xmm15 .byte 102,15,56,220,241 movaps %xmm12,%xmm13 .byte 102,15,56,220,249 movups 48(%rcx),%xmm1 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,220,208 pxor %xmm15,%xmm12 psrad $31,%xmm14 .byte 102,15,56,220,216 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,220,224 .byte 102,15,56,220,232 pxor %xmm14,%xmm15 .byte 102,15,56,220,240 movaps %xmm13,%xmm14 .byte 102,15,56,220,248 movdqa %xmm9,%xmm0 paddd %xmm9,%xmm9 .byte 102,15,56,220,209 pxor %xmm15,%xmm13 psrad $31,%xmm0 .byte 102,15,56,220,217 paddq %xmm15,%xmm15 pand %xmm8,%xmm0 .byte 102,15,56,220,225 .byte 102,15,56,220,233 pxor %xmm0,%xmm15 movups (%r11),%xmm0 .byte 102,15,56,220,241 .byte 102,15,56,220,249 movups 16(%r11),%xmm1 pxor %xmm15,%xmm14 psrad $31,%xmm9 .byte 102,15,56,221,84,36,0 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 .byte 102,15,56,221,92,36,16 .byte 102,15,56,221,100,36,32 pxor %xmm9,%xmm15 .byte 102,15,56,221,108,36,48 .byte 102,15,56,221,116,36,64 .byte 102,15,56,221,124,36,80 movl %r10d,%eax leaq 96(%rsi),%rsi movups %xmm2,-96(%rsi) movups %xmm3,-80(%rsi) movups %xmm4,-64(%rsi) movups %xmm5,-48(%rsi) movups %xmm6,-32(%rsi) movups %xmm7,-16(%rsi) subq $96,%rdx jnc L$xts_enc_grandloop leal 7(%rax,%rax,1),%eax movq %r11,%rcx movl %eax,%r10d L$xts_enc_short: pxor %xmm0,%xmm10 addq $96,%rdx jz L$xts_enc_done pxor %xmm0,%xmm11 cmpq $32,%rdx jb L$xts_enc_one pxor %xmm0,%xmm12 je L$xts_enc_two pxor %xmm0,%xmm13 cmpq $64,%rdx jb L$xts_enc_three pxor %xmm0,%xmm14 je L$xts_enc_four movdqu (%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 leaq 80(%rdi),%rdi pxor %xmm12,%xmm4 pxor %xmm13,%xmm5 pxor %xmm14,%xmm6 call _aesni_encrypt6 xorps %xmm10,%xmm2 movdqa %xmm15,%xmm10 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 movdqu %xmm2,(%rsi) xorps %xmm13,%xmm5 movdqu %xmm3,16(%rsi) xorps %xmm14,%xmm6 movdqu %xmm4,32(%rsi) movdqu %xmm5,48(%rsi) movdqu %xmm6,64(%rsi) leaq 80(%rsi),%rsi jmp L$xts_enc_done .p2align 4 L$xts_enc_one: movups (%rdi),%xmm2 leaq 16(%rdi),%rdi xorps %xmm10,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_enc1_9: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_9 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 movups %xmm2,(%rsi) leaq 16(%rsi),%rsi jmp L$xts_enc_done .p2align 4 L$xts_enc_two: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 leaq 32(%rdi),%rdi xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 call _aesni_encrypt3 xorps %xmm10,%xmm2 movdqa %xmm12,%xmm10 xorps %xmm11,%xmm3 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) leaq 32(%rsi),%rsi jmp L$xts_enc_done .p2align 4 L$xts_enc_three: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 movups 32(%rdi),%xmm4 leaq 48(%rdi),%rdi xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 call _aesni_encrypt3 xorps %xmm10,%xmm2 movdqa %xmm13,%xmm10 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) leaq 48(%rsi),%rsi jmp L$xts_enc_done .p2align 4 L$xts_enc_four: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 movups 32(%rdi),%xmm4 xorps %xmm10,%xmm2 movups 48(%rdi),%xmm5 leaq 64(%rdi),%rdi xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 xorps %xmm13,%xmm5 call _aesni_encrypt4 pxor %xmm10,%xmm2 movdqa %xmm14,%xmm10 pxor %xmm11,%xmm3 pxor %xmm12,%xmm4 movdqu %xmm2,(%rsi) pxor %xmm13,%xmm5 movdqu %xmm3,16(%rsi) movdqu %xmm4,32(%rsi) movdqu %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp L$xts_enc_done .p2align 4 L$xts_enc_done: andq $15,%r9 jz L$xts_enc_ret movq %r9,%rdx L$xts_enc_steal: movzbl (%rdi),%eax movzbl -16(%rsi),%ecx leaq 1(%rdi),%rdi movb %al,-16(%rsi) movb %cl,0(%rsi) leaq 1(%rsi),%rsi subq $1,%rdx jnz L$xts_enc_steal subq %r9,%rsi movq %r11,%rcx movl %r10d,%eax movups -16(%rsi),%xmm2 xorps %xmm10,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_enc1_10: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_10 .byte 102,15,56,221,209 xorps %xmm10,%xmm2 movups %xmm2,-16(%rsi) L$xts_enc_ret: leaq (%rbp),%rsp popq %rbp L$xts_enc_epilogue: .byte 0xf3,0xc3 .globl _aesni_xts_decrypt .p2align 4 _aesni_xts_decrypt: leaq (%rsp),%rax pushq %rbp subq $112,%rsp andq $-16,%rsp leaq -8(%rax),%rbp movups (%r9),%xmm15 movl 240(%r8),%eax movl 240(%rcx),%r10d movups (%r8),%xmm0 movups 16(%r8),%xmm1 leaq 32(%r8),%r8 xorps %xmm0,%xmm15 L$oop_enc1_11: .byte 102,68,15,56,220,249 decl %eax movups (%r8),%xmm1 leaq 16(%r8),%r8 jnz L$oop_enc1_11 .byte 102,68,15,56,221,249 xorl %eax,%eax testq $15,%rdx setnz %al shlq $4,%rax subq %rax,%rdx movups (%rcx),%xmm0 movq %rcx,%r11 movl %r10d,%eax shll $4,%r10d movq %rdx,%r9 andq $-16,%rdx movups 16(%rcx,%r10,1),%xmm1 movl %eax,%r10d movdqa L$xts_magic(%rip),%xmm8 pshufd $95,%xmm15,%xmm9 pxor %xmm0,%xmm1 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm10 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm10 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm11 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm11 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm12 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm12 pxor %xmm14,%xmm15 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 movdqa %xmm15,%xmm13 psrad $31,%xmm14 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 pxor %xmm0,%xmm13 pxor %xmm14,%xmm15 movdqa %xmm15,%xmm14 psrad $31,%xmm9 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 pxor %xmm0,%xmm14 pxor %xmm9,%xmm15 movaps %xmm1,96(%rsp) subq $96,%rdx jc L$xts_dec_short shrl $1,%eax subl $3,%eax movups 16(%r11),%xmm1 movl %eax,%r10d leaq L$xts_magic(%rip),%r8 jmp L$xts_dec_grandloop .p2align 5 L$xts_dec_grandloop: movdqu 0(%rdi),%xmm2 movdqa %xmm0,%xmm8 movdqu 16(%rdi),%xmm3 pxor %xmm10,%xmm2 movdqu 32(%rdi),%xmm4 pxor %xmm11,%xmm3 .byte 102,15,56,222,209 movdqu 48(%rdi),%xmm5 pxor %xmm12,%xmm4 .byte 102,15,56,222,217 movdqu 64(%rdi),%xmm6 pxor %xmm13,%xmm5 .byte 102,15,56,222,225 movdqu 80(%rdi),%xmm7 pxor %xmm15,%xmm8 movdqa 96(%rsp),%xmm9 pxor %xmm14,%xmm6 .byte 102,15,56,222,233 movups 32(%r11),%xmm0 leaq 96(%rdi),%rdi pxor %xmm8,%xmm7 pxor %xmm9,%xmm10 .byte 102,15,56,222,241 pxor %xmm9,%xmm11 movdqa %xmm10,0(%rsp) .byte 102,15,56,222,249 movups 48(%r11),%xmm1 .byte 102,15,56,222,208 pxor %xmm9,%xmm12 movdqa %xmm11,16(%rsp) .byte 102,15,56,222,216 pxor %xmm9,%xmm13 movdqa %xmm12,32(%rsp) .byte 102,15,56,222,224 pxor %xmm9,%xmm14 .byte 102,15,56,222,232 pxor %xmm9,%xmm8 movdqa %xmm14,64(%rsp) .byte 102,15,56,222,240 movdqa %xmm8,80(%rsp) .byte 102,15,56,222,248 movups 64(%r11),%xmm0 leaq 64(%r11),%rcx pshufd $95,%xmm15,%xmm9 jmp L$xts_dec_loop6 .p2align 5 L$xts_dec_loop6: .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 movups (%rcx),%xmm0 decl %eax jnz L$xts_dec_loop6 movdqa (%r8),%xmm8 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,222,209 paddq %xmm15,%xmm15 psrad $31,%xmm14 .byte 102,15,56,222,217 pand %xmm8,%xmm14 movups (%r11),%xmm10 .byte 102,15,56,222,225 .byte 102,15,56,222,233 pxor %xmm14,%xmm15 .byte 102,15,56,222,241 movaps %xmm10,%xmm11 .byte 102,15,56,222,249 movups 16(%rcx),%xmm1 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,222,208 pxor %xmm15,%xmm10 psrad $31,%xmm14 .byte 102,15,56,222,216 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,222,224 .byte 102,15,56,222,232 pxor %xmm14,%xmm15 .byte 102,15,56,222,240 movaps %xmm11,%xmm12 .byte 102,15,56,222,248 movups 32(%rcx),%xmm0 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,222,209 pxor %xmm15,%xmm11 psrad $31,%xmm14 .byte 102,15,56,222,217 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,222,225 movdqa %xmm13,48(%rsp) .byte 102,15,56,222,233 pxor %xmm14,%xmm15 .byte 102,15,56,222,241 movaps %xmm12,%xmm13 .byte 102,15,56,222,249 movups 48(%rcx),%xmm1 movdqa %xmm9,%xmm14 paddd %xmm9,%xmm9 .byte 102,15,56,222,208 pxor %xmm15,%xmm12 psrad $31,%xmm14 .byte 102,15,56,222,216 paddq %xmm15,%xmm15 pand %xmm8,%xmm14 .byte 102,15,56,222,224 .byte 102,15,56,222,232 pxor %xmm14,%xmm15 .byte 102,15,56,222,240 movaps %xmm13,%xmm14 .byte 102,15,56,222,248 movdqa %xmm9,%xmm0 paddd %xmm9,%xmm9 .byte 102,15,56,222,209 pxor %xmm15,%xmm13 psrad $31,%xmm0 .byte 102,15,56,222,217 paddq %xmm15,%xmm15 pand %xmm8,%xmm0 .byte 102,15,56,222,225 .byte 102,15,56,222,233 pxor %xmm0,%xmm15 movups (%r11),%xmm0 .byte 102,15,56,222,241 .byte 102,15,56,222,249 movups 16(%r11),%xmm1 pxor %xmm15,%xmm14 psrad $31,%xmm9 .byte 102,15,56,223,84,36,0 paddq %xmm15,%xmm15 pand %xmm8,%xmm9 .byte 102,15,56,223,92,36,16 .byte 102,15,56,223,100,36,32 pxor %xmm9,%xmm15 .byte 102,15,56,223,108,36,48 .byte 102,15,56,223,116,36,64 .byte 102,15,56,223,124,36,80 movl %r10d,%eax leaq 96(%rsi),%rsi movups %xmm2,-96(%rsi) movups %xmm3,-80(%rsi) movups %xmm4,-64(%rsi) movups %xmm5,-48(%rsi) movups %xmm6,-32(%rsi) movups %xmm7,-16(%rsi) subq $96,%rdx jnc L$xts_dec_grandloop leal 7(%rax,%rax,1),%eax movq %r11,%rcx movl %eax,%r10d L$xts_dec_short: pxor %xmm0,%xmm10 pxor %xmm0,%xmm11 addq $96,%rdx jz L$xts_dec_done pxor %xmm0,%xmm12 cmpq $32,%rdx jb L$xts_dec_one pxor %xmm0,%xmm13 je L$xts_dec_two pxor %xmm0,%xmm14 cmpq $64,%rdx jb L$xts_dec_three je L$xts_dec_four movdqu (%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqu 32(%rdi),%xmm4 pxor %xmm10,%xmm2 movdqu 48(%rdi),%xmm5 pxor %xmm11,%xmm3 movdqu 64(%rdi),%xmm6 leaq 80(%rdi),%rdi pxor %xmm12,%xmm4 pxor %xmm13,%xmm5 pxor %xmm14,%xmm6 call _aesni_decrypt6 xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 movdqu %xmm2,(%rsi) xorps %xmm13,%xmm5 movdqu %xmm3,16(%rsi) xorps %xmm14,%xmm6 movdqu %xmm4,32(%rsi) pxor %xmm14,%xmm14 movdqu %xmm5,48(%rsi) pcmpgtd %xmm15,%xmm14 movdqu %xmm6,64(%rsi) leaq 80(%rsi),%rsi pshufd $19,%xmm14,%xmm11 andq $15,%r9 jz L$xts_dec_ret movdqa %xmm15,%xmm10 paddq %xmm15,%xmm15 pand %xmm8,%xmm11 pxor %xmm15,%xmm11 jmp L$xts_dec_done2 .p2align 4 L$xts_dec_one: movups (%rdi),%xmm2 leaq 16(%rdi),%rdi xorps %xmm10,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_dec1_12: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_12 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movdqa %xmm11,%xmm10 movups %xmm2,(%rsi) movdqa %xmm12,%xmm11 leaq 16(%rsi),%rsi jmp L$xts_dec_done .p2align 4 L$xts_dec_two: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 leaq 32(%rdi),%rdi xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 call _aesni_decrypt3 xorps %xmm10,%xmm2 movdqa %xmm12,%xmm10 xorps %xmm11,%xmm3 movdqa %xmm13,%xmm11 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) leaq 32(%rsi),%rsi jmp L$xts_dec_done .p2align 4 L$xts_dec_three: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 movups 32(%rdi),%xmm4 leaq 48(%rdi),%rdi xorps %xmm10,%xmm2 xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 call _aesni_decrypt3 xorps %xmm10,%xmm2 movdqa %xmm13,%xmm10 xorps %xmm11,%xmm3 movdqa %xmm14,%xmm11 xorps %xmm12,%xmm4 movups %xmm2,(%rsi) movups %xmm3,16(%rsi) movups %xmm4,32(%rsi) leaq 48(%rsi),%rsi jmp L$xts_dec_done .p2align 4 L$xts_dec_four: movups (%rdi),%xmm2 movups 16(%rdi),%xmm3 movups 32(%rdi),%xmm4 xorps %xmm10,%xmm2 movups 48(%rdi),%xmm5 leaq 64(%rdi),%rdi xorps %xmm11,%xmm3 xorps %xmm12,%xmm4 xorps %xmm13,%xmm5 call _aesni_decrypt4 pxor %xmm10,%xmm2 movdqa %xmm14,%xmm10 pxor %xmm11,%xmm3 movdqa %xmm15,%xmm11 pxor %xmm12,%xmm4 movdqu %xmm2,(%rsi) pxor %xmm13,%xmm5 movdqu %xmm3,16(%rsi) movdqu %xmm4,32(%rsi) movdqu %xmm5,48(%rsi) leaq 64(%rsi),%rsi jmp L$xts_dec_done .p2align 4 L$xts_dec_done: andq $15,%r9 jz L$xts_dec_ret L$xts_dec_done2: movq %r9,%rdx movq %r11,%rcx movl %r10d,%eax movups (%rdi),%xmm2 xorps %xmm11,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_dec1_13: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_13 .byte 102,15,56,223,209 xorps %xmm11,%xmm2 movups %xmm2,(%rsi) L$xts_dec_steal: movzbl 16(%rdi),%eax movzbl (%rsi),%ecx leaq 1(%rdi),%rdi movb %al,(%rsi) movb %cl,16(%rsi) leaq 1(%rsi),%rsi subq $1,%rdx jnz L$xts_dec_steal subq %r9,%rsi movq %r11,%rcx movl %r10d,%eax movups (%rsi),%xmm2 xorps %xmm10,%xmm2 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_dec1_14: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_14 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movups %xmm2,(%rsi) L$xts_dec_ret: leaq (%rbp),%rsp popq %rbp L$xts_dec_epilogue: .byte 0xf3,0xc3 .globl _aesni_cbc_encrypt .p2align 4 _aesni_cbc_encrypt: testq %rdx,%rdx jz L$cbc_ret movl 240(%rcx),%r10d movq %rcx,%r11 testl %r9d,%r9d jz L$cbc_decrypt movups (%r8),%xmm2 movl %r10d,%eax cmpq $16,%rdx jb L$cbc_enc_tail subq $16,%rdx jmp L$cbc_enc_loop .p2align 4 L$cbc_enc_loop: movups (%rdi),%xmm3 leaq 16(%rdi),%rdi movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 xorps %xmm0,%xmm3 leaq 32(%rcx),%rcx xorps %xmm3,%xmm2 L$oop_enc1_15: .byte 102,15,56,220,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_enc1_15 .byte 102,15,56,221,209 movl %r10d,%eax movq %r11,%rcx movups %xmm2,0(%rsi) leaq 16(%rsi),%rsi subq $16,%rdx jnc L$cbc_enc_loop addq $16,%rdx jnz L$cbc_enc_tail movups %xmm2,(%r8) jmp L$cbc_ret L$cbc_enc_tail: movq %rdx,%rcx xchgq %rdi,%rsi .long 0x9066A4F3 movl $16,%ecx subq %rdx,%rcx xorl %eax,%eax .long 0x9066AAF3 leaq -16(%rdi),%rdi movl %r10d,%eax movq %rdi,%rsi movq %r11,%rcx xorq %rdx,%rdx jmp L$cbc_enc_loop .p2align 4 L$cbc_decrypt: leaq (%rsp),%rax pushq %rbp subq $16,%rsp andq $-16,%rsp leaq -8(%rax),%rbp movups (%r8),%xmm10 movl %r10d,%eax cmpq $80,%rdx jbe L$cbc_dec_tail movups (%rcx),%xmm0 movdqu 0(%rdi),%xmm2 movdqu 16(%rdi),%xmm3 movdqa %xmm2,%xmm11 movdqu 32(%rdi),%xmm4 movdqa %xmm3,%xmm12 movdqu 48(%rdi),%xmm5 movdqa %xmm4,%xmm13 movdqu 64(%rdi),%xmm6 movdqa %xmm5,%xmm14 movdqu 80(%rdi),%xmm7 movdqa %xmm6,%xmm15 cmpq $112,%rdx jbe L$cbc_dec_six_or_seven subq $112,%rdx leaq 112(%rcx),%rcx jmp L$cbc_dec_loop8_enter .p2align 4 L$cbc_dec_loop8: movups %xmm9,(%rsi) leaq 16(%rsi),%rsi L$cbc_dec_loop8_enter: movdqu 96(%rdi),%xmm8 pxor %xmm0,%xmm2 movdqu 112(%rdi),%xmm9 pxor %xmm0,%xmm3 movups 16-112(%rcx),%xmm1 pxor %xmm0,%xmm4 xorq %r11,%r11 cmpq $112,%rdx pxor %xmm0,%xmm5 pxor %xmm0,%xmm6 pxor %xmm0,%xmm7 pxor %xmm0,%xmm8 .byte 102,15,56,222,209 pxor %xmm0,%xmm9 movups 32-112(%rcx),%xmm0 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 setnc %r11b .byte 102,68,15,56,222,193 shlq $7,%r11 .byte 102,68,15,56,222,201 addq %rdi,%r11 movups 48-112(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 64-112(%rcx),%xmm0 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 80-112(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 96-112(%rcx),%xmm0 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 112-112(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 128-112(%rcx),%xmm0 .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 144-112(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 160-112(%rcx),%xmm0 cmpl $11,%eax jb L$cbc_dec_done .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 176-112(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 192-112(%rcx),%xmm0 je L$cbc_dec_done .byte 102,15,56,222,209 .byte 102,15,56,222,217 .byte 102,15,56,222,225 .byte 102,15,56,222,233 .byte 102,15,56,222,241 .byte 102,15,56,222,249 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movups 208-112(%rcx),%xmm1 .byte 102,15,56,222,208 .byte 102,15,56,222,216 .byte 102,15,56,222,224 .byte 102,15,56,222,232 .byte 102,15,56,222,240 .byte 102,15,56,222,248 .byte 102,68,15,56,222,192 .byte 102,68,15,56,222,200 movups 224-112(%rcx),%xmm0 L$cbc_dec_done: .byte 102,15,56,222,209 pxor %xmm0,%xmm10 .byte 102,15,56,222,217 pxor %xmm0,%xmm11 .byte 102,15,56,222,225 pxor %xmm0,%xmm12 .byte 102,15,56,222,233 pxor %xmm0,%xmm13 .byte 102,15,56,222,241 pxor %xmm0,%xmm14 .byte 102,15,56,222,249 pxor %xmm0,%xmm15 .byte 102,68,15,56,222,193 .byte 102,68,15,56,222,201 movdqu 80(%rdi),%xmm1 .byte 102,65,15,56,223,210 movdqu 96(%rdi),%xmm10 pxor %xmm0,%xmm1 .byte 102,65,15,56,223,219 pxor %xmm0,%xmm10 movdqu 112(%rdi),%xmm0 leaq 128(%rdi),%rdi .byte 102,65,15,56,223,228 movdqu 0(%r11),%xmm11 .byte 102,65,15,56,223,237 movdqu 16(%r11),%xmm12 .byte 102,65,15,56,223,246 movdqu 32(%r11),%xmm13 .byte 102,65,15,56,223,255 movdqu 48(%r11),%xmm14 .byte 102,68,15,56,223,193 movdqu 64(%r11),%xmm15 .byte 102,69,15,56,223,202 movdqa %xmm0,%xmm10 movdqu 80(%r11),%xmm1 movups -112(%rcx),%xmm0 movups %xmm2,(%rsi) movdqa %xmm11,%xmm2 movups %xmm3,16(%rsi) movdqa %xmm12,%xmm3 movups %xmm4,32(%rsi) movdqa %xmm13,%xmm4 movups %xmm5,48(%rsi) movdqa %xmm14,%xmm5 movups %xmm6,64(%rsi) movdqa %xmm15,%xmm6 movups %xmm7,80(%rsi) movdqa %xmm1,%xmm7 movups %xmm8,96(%rsi) leaq 112(%rsi),%rsi subq $128,%rdx ja L$cbc_dec_loop8 movaps %xmm9,%xmm2 leaq -112(%rcx),%rcx addq $112,%rdx jle L$cbc_dec_tail_collected movups %xmm9,(%rsi) leaq 16(%rsi),%rsi cmpq $80,%rdx jbe L$cbc_dec_tail movaps %xmm11,%xmm2 L$cbc_dec_six_or_seven: cmpq $96,%rdx ja L$cbc_dec_seven movaps %xmm7,%xmm8 call _aesni_decrypt6 pxor %xmm10,%xmm2 movaps %xmm8,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) leaq 80(%rsi),%rsi movdqa %xmm7,%xmm2 jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_seven: movups 96(%rdi),%xmm8 xorps %xmm9,%xmm9 call _aesni_decrypt8 movups 80(%rdi),%xmm9 pxor %xmm10,%xmm2 movups 96(%rdi),%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) pxor %xmm15,%xmm7 movdqu %xmm6,64(%rsi) pxor %xmm9,%xmm8 movdqu %xmm7,80(%rsi) leaq 96(%rsi),%rsi movdqa %xmm8,%xmm2 jmp L$cbc_dec_tail_collected L$cbc_dec_tail: movups (%rdi),%xmm2 subq $16,%rdx jbe L$cbc_dec_one movups 16(%rdi),%xmm3 movaps %xmm2,%xmm11 subq $16,%rdx jbe L$cbc_dec_two movups 32(%rdi),%xmm4 movaps %xmm3,%xmm12 subq $16,%rdx jbe L$cbc_dec_three movups 48(%rdi),%xmm5 movaps %xmm4,%xmm13 subq $16,%rdx jbe L$cbc_dec_four movups 64(%rdi),%xmm6 movaps %xmm5,%xmm14 movaps %xmm6,%xmm15 xorps %xmm7,%xmm7 call _aesni_decrypt6 pxor %xmm10,%xmm2 movaps %xmm15,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) pxor %xmm14,%xmm6 movdqu %xmm5,48(%rsi) leaq 64(%rsi),%rsi movdqa %xmm6,%xmm2 subq $16,%rdx jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_one: movaps %xmm2,%xmm11 movups (%rcx),%xmm0 movups 16(%rcx),%xmm1 leaq 32(%rcx),%rcx xorps %xmm0,%xmm2 L$oop_dec1_16: .byte 102,15,56,222,209 decl %eax movups (%rcx),%xmm1 leaq 16(%rcx),%rcx jnz L$oop_dec1_16 .byte 102,15,56,223,209 xorps %xmm10,%xmm2 movaps %xmm11,%xmm10 jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_two: movaps %xmm3,%xmm12 xorps %xmm4,%xmm4 call _aesni_decrypt3 pxor %xmm10,%xmm2 movaps %xmm12,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) movdqa %xmm3,%xmm2 leaq 16(%rsi),%rsi jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_three: movaps %xmm4,%xmm13 call _aesni_decrypt3 pxor %xmm10,%xmm2 movaps %xmm13,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) movdqa %xmm4,%xmm2 leaq 32(%rsi),%rsi jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_four: movaps %xmm5,%xmm14 call _aesni_decrypt4 pxor %xmm10,%xmm2 movaps %xmm14,%xmm10 pxor %xmm11,%xmm3 movdqu %xmm2,(%rsi) pxor %xmm12,%xmm4 movdqu %xmm3,16(%rsi) pxor %xmm13,%xmm5 movdqu %xmm4,32(%rsi) movdqa %xmm5,%xmm2 leaq 48(%rsi),%rsi jmp L$cbc_dec_tail_collected .p2align 4 L$cbc_dec_tail_collected: movups %xmm10,(%r8) andq $15,%rdx jnz L$cbc_dec_tail_partial movups %xmm2,(%rsi) jmp L$cbc_dec_ret .p2align 4 L$cbc_dec_tail_partial: movaps %xmm2,(%rsp) movq $16,%rcx movq %rsi,%rdi subq %rdx,%rcx leaq (%rsp),%rsi .long 0x9066A4F3 L$cbc_dec_ret: leaq (%rbp),%rsp popq %rbp L$cbc_ret: .byte 0xf3,0xc3 .globl _aesni_set_decrypt_key .p2align 4 _aesni_set_decrypt_key: .byte 0x48,0x83,0xEC,0x08 call __aesni_set_encrypt_key shll $4,%esi testl %eax,%eax jnz L$dec_key_ret leaq 16(%rdx,%rsi,1),%rdi movups (%rdx),%xmm0 movups (%rdi),%xmm1 movups %xmm0,(%rdi) movups %xmm1,(%rdx) leaq 16(%rdx),%rdx leaq -16(%rdi),%rdi L$dec_key_inverse: movups (%rdx),%xmm0 movups (%rdi),%xmm1 .byte 102,15,56,219,192 .byte 102,15,56,219,201 leaq 16(%rdx),%rdx leaq -16(%rdi),%rdi movups %xmm0,16(%rdi) movups %xmm1,-16(%rdx) cmpq %rdx,%rdi ja L$dec_key_inverse movups (%rdx),%xmm0 .byte 102,15,56,219,192 movups %xmm0,(%rdi) L$dec_key_ret: addq $8,%rsp .byte 0xf3,0xc3 L$SEH_end_set_decrypt_key: .globl _aesni_set_encrypt_key .p2align 4 _aesni_set_encrypt_key: __aesni_set_encrypt_key: .byte 0x48,0x83,0xEC,0x08 movq $-1,%rax testq %rdi,%rdi jz L$enc_key_ret testq %rdx,%rdx jz L$enc_key_ret movups (%rdi),%xmm0 xorps %xmm4,%xmm4 leaq 16(%rdx),%rax cmpl $256,%esi je L$14rounds cmpl $192,%esi je L$12rounds cmpl $128,%esi jne L$bad_keybits L$10rounds: movl $9,%esi movups %xmm0,(%rdx) .byte 102,15,58,223,200,1 call L$key_expansion_128_cold .byte 102,15,58,223,200,2 call L$key_expansion_128 .byte 102,15,58,223,200,4 call L$key_expansion_128 .byte 102,15,58,223,200,8 call L$key_expansion_128 .byte 102,15,58,223,200,16 call L$key_expansion_128 .byte 102,15,58,223,200,32 call L$key_expansion_128 .byte 102,15,58,223,200,64 call L$key_expansion_128 .byte 102,15,58,223,200,128 call L$key_expansion_128 .byte 102,15,58,223,200,27 call L$key_expansion_128 .byte 102,15,58,223,200,54 call L$key_expansion_128 movups %xmm0,(%rax) movl %esi,80(%rax) xorl %eax,%eax jmp L$enc_key_ret .p2align 4 L$12rounds: movq 16(%rdi),%xmm2 movl $11,%esi movups %xmm0,(%rdx) .byte 102,15,58,223,202,1 call L$key_expansion_192a_cold .byte 102,15,58,223,202,2 call L$key_expansion_192b .byte 102,15,58,223,202,4 call L$key_expansion_192a .byte 102,15,58,223,202,8 call L$key_expansion_192b .byte 102,15,58,223,202,16 call L$key_expansion_192a .byte 102,15,58,223,202,32 call L$key_expansion_192b .byte 102,15,58,223,202,64 call L$key_expansion_192a .byte 102,15,58,223,202,128 call L$key_expansion_192b movups %xmm0,(%rax) movl %esi,48(%rax) xorq %rax,%rax jmp L$enc_key_ret .p2align 4 L$14rounds: movups 16(%rdi),%xmm2 movl $13,%esi leaq 16(%rax),%rax movups %xmm0,(%rdx) movups %xmm2,16(%rdx) .byte 102,15,58,223,202,1 call L$key_expansion_256a_cold .byte 102,15,58,223,200,1 call L$key_expansion_256b .byte 102,15,58,223,202,2 call L$key_expansion_256a .byte 102,15,58,223,200,2 call L$key_expansion_256b .byte 102,15,58,223,202,4 call L$key_expansion_256a .byte 102,15,58,223,200,4 call L$key_expansion_256b .byte 102,15,58,223,202,8 call L$key_expansion_256a .byte 102,15,58,223,200,8 call L$key_expansion_256b .byte 102,15,58,223,202,16 call L$key_expansion_256a .byte 102,15,58,223,200,16 call L$key_expansion_256b .byte 102,15,58,223,202,32 call L$key_expansion_256a .byte 102,15,58,223,200,32 call L$key_expansion_256b .byte 102,15,58,223,202,64 call L$key_expansion_256a movups %xmm0,(%rax) movl %esi,16(%rax) xorq %rax,%rax jmp L$enc_key_ret .p2align 4 L$bad_keybits: movq $-2,%rax L$enc_key_ret: addq $8,%rsp .byte 0xf3,0xc3 L$SEH_end_set_encrypt_key: .p2align 4 L$key_expansion_128: movups %xmm0,(%rax) leaq 16(%rax),%rax L$key_expansion_128_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 .byte 0xf3,0xc3 .p2align 4 L$key_expansion_192a: movups %xmm0,(%rax) leaq 16(%rax),%rax L$key_expansion_192a_cold: movaps %xmm2,%xmm5 L$key_expansion_192b_warm: shufps $16,%xmm0,%xmm4 movdqa %xmm2,%xmm3 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 pslldq $4,%xmm3 xorps %xmm4,%xmm0 pshufd $85,%xmm1,%xmm1 pxor %xmm3,%xmm2 pxor %xmm1,%xmm0 pshufd $255,%xmm0,%xmm3 pxor %xmm3,%xmm2 .byte 0xf3,0xc3 .p2align 4 L$key_expansion_192b: movaps %xmm0,%xmm3 shufps $68,%xmm0,%xmm5 movups %xmm5,(%rax) shufps $78,%xmm2,%xmm3 movups %xmm3,16(%rax) leaq 32(%rax),%rax jmp L$key_expansion_192b_warm .p2align 4 L$key_expansion_256a: movups %xmm2,(%rax) leaq 16(%rax),%rax L$key_expansion_256a_cold: shufps $16,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $140,%xmm0,%xmm4 xorps %xmm4,%xmm0 shufps $255,%xmm1,%xmm1 xorps %xmm1,%xmm0 .byte 0xf3,0xc3 .p2align 4 L$key_expansion_256b: movups %xmm0,(%rax) leaq 16(%rax),%rax shufps $16,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $140,%xmm2,%xmm4 xorps %xmm4,%xmm2 shufps $170,%xmm1,%xmm1 xorps %xmm1,%xmm2 .byte 0xf3,0xc3 .p2align 6 L$bswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 L$increment32: .long 6,6,6,0 L$increment64: .long 1,0,0,0 L$xts_magic: .long 0x87,0,1,0 L$increment1: .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 .p2align 6 .section .note.GNU-stack,"",%progbits