Utilize the optimized SHA functions in Padlock HMAC.
[gnutls:gnutls.git] / lib / accelerated / x86 / coff / padlock-x86-64-coff.s
1 # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7
8 #     * Redistributions of source code must retain copyright notices,
9 #      this list of conditions and the following disclaimer.
10 #
11 #     * Redistributions in binary form must reproduce the above
12 #      copyright notice, this list of conditions and the following
13 #      disclaimer in the documentation and/or other materials
14 #      provided with the distribution.
15 #
16 #     * Neither the name of the Andy Polyakov nor the names of its
17 #      copyright holder and contributors may be used to endorse or
18 #      promote products derived from this software without specific
19 #      prior written permission.
20 #
21 # ALTERNATIVELY, provided that this notice is retained in full, this
22 # product may be distributed under the terms of the GNU General Public
23 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24 # those given above.
25 #
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #
38 # *** This file is auto-generated ***
39 #
40 .text   
41 .globl  padlock_capability
42 .def    padlock_capability;     .scl 2; .type 32;       .endef
43 .p2align        4
44 padlock_capability:
45         movq    %rbx,%r8
46         xorl    %eax,%eax
47         cpuid
48         xorl    %eax,%eax
49         cmpl    $1953391939,%ebx
50         jne     .Lnoluck
51         cmpl    $1215460705,%edx
52         jne     .Lnoluck
53         cmpl    $1936487777,%ecx
54         jne     .Lnoluck
55         movl    $3221225472,%eax
56         cpuid
57         movl    %eax,%edx
58         xorl    %eax,%eax
59         cmpl    $3221225473,%edx
60         jb      .Lnoluck
61         movl    $3221225473,%eax
62         cpuid
63         movl    %edx,%eax
64         andl    $4294967279,%eax
65         orl     $16,%eax
66 .Lnoluck:
67         movq    %r8,%rbx
68         .byte   0xf3,0xc3
69
70
71 .globl  padlock_key_bswap
72 .def    padlock_key_bswap;      .scl 2; .type 32;       .endef
73 .p2align        4
74 padlock_key_bswap:
75         movl    240(%rcx),%edx
76 .Lbswap_loop:
77         movl    (%rcx),%eax
78         bswapl  %eax
79         movl    %eax,(%rcx)
80         leaq    4(%rcx),%rcx
81         subl    $1,%edx
82         jnz     .Lbswap_loop
83         .byte   0xf3,0xc3
84
85
86 .globl  padlock_verify_context
87 .def    padlock_verify_context; .scl 2; .type 32;       .endef
88 .p2align        4
89 padlock_verify_context:
90         movq    %rcx,%rdx
91         pushf
92         leaq    .Lpadlock_saved_context(%rip),%rax
93         call    _padlock_verify_ctx
94         leaq    8(%rsp),%rsp
95         .byte   0xf3,0xc3
96
97
98 .def    _padlock_verify_ctx;    .scl 3; .type 32;       .endef
99 .p2align        4
100 _padlock_verify_ctx:
101         movq    8(%rsp),%r8
102         btq     $30,%r8
103         jnc     .Lverified
104         cmpq    (%rax),%rdx
105         je      .Lverified
106         pushf
107         popf
108 .Lverified:
109         movq    %rdx,(%rax)
110         .byte   0xf3,0xc3
111
112
113 .globl  padlock_reload_key
114 .def    padlock_reload_key;     .scl 2; .type 32;       .endef
115 .p2align        4
116 padlock_reload_key:
117         pushf
118         popf
119         .byte   0xf3,0xc3
120
121
122 .globl  padlock_aes_block
123 .def    padlock_aes_block;      .scl 2; .type 32;       .endef
124 .p2align        4
125 padlock_aes_block:
126         movq    %rdi,8(%rsp)
127         movq    %rsi,16(%rsp)
128         movq    %rsp,%rax
129 .LSEH_begin_padlock_aes_block:
130         movq    %rcx,%rdi
131         movq    %rdx,%rsi
132         movq    %r8,%rdx
133
134         movq    %rbx,%r8
135         movq    $1,%rcx
136         leaq    32(%rdx),%rbx
137         leaq    16(%rdx),%rdx
138 .byte   0xf3,0x0f,0xa7,0xc8     
139         movq    %r8,%rbx
140         movq    8(%rsp),%rdi
141         movq    16(%rsp),%rsi
142         .byte   0xf3,0xc3
143 .LSEH_end_padlock_aes_block:
144
145 .globl  padlock_xstore
146 .def    padlock_xstore; .scl 2; .type 32;       .endef
147 .p2align        4
148 padlock_xstore:
149         movq    %rdi,8(%rsp)
150         movq    %rsi,16(%rsp)
151         movq    %rsp,%rax
152 .LSEH_begin_padlock_xstore:
153         movq    %rcx,%rdi
154         movq    %rdx,%rsi
155
156         movl    %esi,%edx
157 .byte   0x0f,0xa7,0xc0          
158         movq    8(%rsp),%rdi
159         movq    16(%rsp),%rsi
160         .byte   0xf3,0xc3
161 .LSEH_end_padlock_xstore:
162
163 .globl  padlock_sha1_oneshot
164 .def    padlock_sha1_oneshot;   .scl 2; .type 32;       .endef
165 .p2align        4
166 padlock_sha1_oneshot:
167         movq    %rdi,8(%rsp)
168         movq    %rsi,16(%rsp)
169         movq    %rsp,%rax
170 .LSEH_begin_padlock_sha1_oneshot:
171         movq    %rcx,%rdi
172         movq    %rdx,%rsi
173         movq    %r8,%rdx
174
175         movq    %rdx,%rcx
176         movq    %rdi,%rdx
177         movups  (%rdi),%xmm0
178         subq    $128+8,%rsp
179         movl    16(%rdi),%eax
180         movaps  %xmm0,(%rsp)
181         movq    %rsp,%rdi
182         movl    %eax,16(%rsp)
183         xorq    %rax,%rax
184 .byte   0xf3,0x0f,0xa6,0xc8     
185         movaps  (%rsp),%xmm0
186         movl    16(%rsp),%eax
187         addq    $128+8,%rsp
188         movups  %xmm0,(%rdx)
189         movl    %eax,16(%rdx)
190         movq    8(%rsp),%rdi
191         movq    16(%rsp),%rsi
192         .byte   0xf3,0xc3
193 .LSEH_end_padlock_sha1_oneshot:
194
195 .globl  padlock_sha1_blocks
196 .def    padlock_sha1_blocks;    .scl 2; .type 32;       .endef
197 .p2align        4
198 padlock_sha1_blocks:
199         movq    %rdi,8(%rsp)
200         movq    %rsi,16(%rsp)
201         movq    %rsp,%rax
202 .LSEH_begin_padlock_sha1_blocks:
203         movq    %rcx,%rdi
204         movq    %rdx,%rsi
205         movq    %r8,%rdx
206
207         movq    %rdx,%rcx
208         movq    %rdi,%rdx
209         movups  (%rdi),%xmm0
210         subq    $128+8,%rsp
211         movl    16(%rdi),%eax
212         movaps  %xmm0,(%rsp)
213         movq    %rsp,%rdi
214         movl    %eax,16(%rsp)
215         movq    $-1,%rax
216 .byte   0xf3,0x0f,0xa6,0xc8     
217         movaps  (%rsp),%xmm0
218         movl    16(%rsp),%eax
219         addq    $128+8,%rsp
220         movups  %xmm0,(%rdx)
221         movl    %eax,16(%rdx)
222         movq    8(%rsp),%rdi
223         movq    16(%rsp),%rsi
224         .byte   0xf3,0xc3
225 .LSEH_end_padlock_sha1_blocks:
226
227 .globl  padlock_sha256_oneshot
228 .def    padlock_sha256_oneshot; .scl 2; .type 32;       .endef
229 .p2align        4
230 padlock_sha256_oneshot:
231         movq    %rdi,8(%rsp)
232         movq    %rsi,16(%rsp)
233         movq    %rsp,%rax
234 .LSEH_begin_padlock_sha256_oneshot:
235         movq    %rcx,%rdi
236         movq    %rdx,%rsi
237         movq    %r8,%rdx
238
239         movq    %rdx,%rcx
240         movq    %rdi,%rdx
241         movups  (%rdi),%xmm0
242         subq    $128+8,%rsp
243         movups  16(%rdi),%xmm1
244         movaps  %xmm0,(%rsp)
245         movq    %rsp,%rdi
246         movaps  %xmm1,16(%rsp)
247         xorq    %rax,%rax
248 .byte   0xf3,0x0f,0xa6,0xd0     
249         movaps  (%rsp),%xmm0
250         movaps  16(%rsp),%xmm1
251         addq    $128+8,%rsp
252         movups  %xmm0,(%rdx)
253         movups  %xmm1,16(%rdx)
254         movq    8(%rsp),%rdi
255         movq    16(%rsp),%rsi
256         .byte   0xf3,0xc3
257 .LSEH_end_padlock_sha256_oneshot:
258
259 .globl  padlock_sha256_blocks
260 .def    padlock_sha256_blocks;  .scl 2; .type 32;       .endef
261 .p2align        4
262 padlock_sha256_blocks:
263         movq    %rdi,8(%rsp)
264         movq    %rsi,16(%rsp)
265         movq    %rsp,%rax
266 .LSEH_begin_padlock_sha256_blocks:
267         movq    %rcx,%rdi
268         movq    %rdx,%rsi
269         movq    %r8,%rdx
270
271         movq    %rdx,%rcx
272         movq    %rdi,%rdx
273         movups  (%rdi),%xmm0
274         subq    $128+8,%rsp
275         movups  16(%rdi),%xmm1
276         movaps  %xmm0,(%rsp)
277         movq    %rsp,%rdi
278         movaps  %xmm1,16(%rsp)
279         movq    $-1,%rax
280 .byte   0xf3,0x0f,0xa6,0xd0     
281         movaps  (%rsp),%xmm0
282         movaps  16(%rsp),%xmm1
283         addq    $128+8,%rsp
284         movups  %xmm0,(%rdx)
285         movups  %xmm1,16(%rdx)
286         movq    8(%rsp),%rdi
287         movq    16(%rsp),%rsi
288         .byte   0xf3,0xc3
289 .LSEH_end_padlock_sha256_blocks:
290
291 .globl  padlock_sha512_blocks
292 .def    padlock_sha512_blocks;  .scl 2; .type 32;       .endef
293 .p2align        4
294 padlock_sha512_blocks:
295         movq    %rdi,8(%rsp)
296         movq    %rsi,16(%rsp)
297         movq    %rsp,%rax
298 .LSEH_begin_padlock_sha512_blocks:
299         movq    %rcx,%rdi
300         movq    %rdx,%rsi
301         movq    %r8,%rdx
302
303         movq    %rdx,%rcx
304         movq    %rdi,%rdx
305         movups  (%rdi),%xmm0
306         subq    $128+8,%rsp
307         movups  16(%rdi),%xmm1
308         movups  32(%rdi),%xmm2
309         movups  48(%rdi),%xmm3
310         movaps  %xmm0,(%rsp)
311         movq    %rsp,%rdi
312         movaps  %xmm1,16(%rsp)
313         movaps  %xmm2,32(%rsp)
314         movaps  %xmm3,48(%rsp)
315 .byte   0xf3,0x0f,0xa6,0xe0     
316         movaps  (%rsp),%xmm0
317         movaps  16(%rsp),%xmm1
318         movaps  32(%rsp),%xmm2
319         movaps  48(%rsp),%xmm3
320         addq    $128+8,%rsp
321         movups  %xmm0,(%rdx)
322         movups  %xmm1,16(%rdx)
323         movups  %xmm2,32(%rdx)
324         movups  %xmm3,48(%rdx)
325         movq    8(%rsp),%rdi
326         movq    16(%rsp),%rsi
327         .byte   0xf3,0xc3
328 .LSEH_end_padlock_sha512_blocks:
329 .globl  padlock_ecb_encrypt
330 .def    padlock_ecb_encrypt;    .scl 2; .type 32;       .endef
331 .p2align        4
332 padlock_ecb_encrypt:
333         movq    %rdi,8(%rsp)
334         movq    %rsi,16(%rsp)
335         movq    %rsp,%rax
336 .LSEH_begin_padlock_ecb_encrypt:
337         movq    %rcx,%rdi
338         movq    %rdx,%rsi
339         movq    %r8,%rdx
340         movq    %r9,%rcx
341
342         pushq   %rbp
343         pushq   %rbx
344
345         xorl    %eax,%eax
346         testq   $15,%rdx
347         jnz     .Lecb_abort
348         testq   $15,%rcx
349         jnz     .Lecb_abort
350         leaq    .Lpadlock_saved_context(%rip),%rax
351         pushf
352         cld
353         call    _padlock_verify_ctx
354         leaq    16(%rdx),%rdx
355         xorl    %eax,%eax
356         xorl    %ebx,%ebx
357         testl   $32,(%rdx)
358         jnz     .Lecb_aligned
359         testq   $15,%rdi
360         setz    %al
361         testq   $15,%rsi
362         setz    %bl
363         testl   %ebx,%eax
364         jnz     .Lecb_aligned
365         negq    %rax
366         movq    $512,%rbx
367         notq    %rax
368         leaq    (%rsp),%rbp
369         cmpq    %rbx,%rcx
370         cmovcq  %rcx,%rbx
371         andq    %rbx,%rax
372         movq    %rcx,%rbx
373         negq    %rax
374         andq    $512-1,%rbx
375         leaq    (%rax,%rbp,1),%rsp
376         movq    $512,%rax
377         cmovzq  %rax,%rbx
378         cmpq    %rbx,%rcx
379         ja      .Lecb_loop
380         movq    %rsi,%rax
381         cmpq    %rsp,%rbp
382         cmoveq  %rdi,%rax
383         addq    %rcx,%rax
384         negq    %rax
385         andq    $4095,%rax
386         cmpq    $128,%rax
387         movq    $-128,%rax
388         cmovaeq %rbx,%rax
389         andq    %rax,%rbx
390         jz      .Lecb_unaligned_tail
391         jmp     .Lecb_loop
392 .p2align        4
393 .Lecb_loop:
394         cmpq    %rcx,%rbx
395         cmovaq  %rcx,%rbx
396         movq    %rdi,%r8
397         movq    %rsi,%r9
398         movq    %rcx,%r10
399         movq    %rbx,%rcx
400         movq    %rbx,%r11
401         testq   $15,%rdi
402         cmovnzq %rsp,%rdi
403         testq   $15,%rsi
404         jz      .Lecb_inp_aligned
405         shrq    $3,%rcx
406 .byte   0xf3,0x48,0xa5          
407         subq    %rbx,%rdi
408         movq    %rbx,%rcx
409         movq    %rdi,%rsi
410 .Lecb_inp_aligned:
411         leaq    -16(%rdx),%rax
412         leaq    16(%rdx),%rbx
413         shrq    $4,%rcx
414 .byte   0xf3,0x0f,0xa7,200      
415         movq    %r8,%rdi
416         movq    %r11,%rbx
417         testq   $15,%rdi
418         jz      .Lecb_out_aligned
419         movq    %rbx,%rcx
420         leaq    (%rsp),%rsi
421         shrq    $3,%rcx
422 .byte   0xf3,0x48,0xa5          
423         subq    %rbx,%rdi
424 .Lecb_out_aligned:
425         movq    %r9,%rsi
426         movq    %r10,%rcx
427         addq    %rbx,%rdi
428         addq    %rbx,%rsi
429         subq    %rbx,%rcx
430         movq    $512,%rbx
431         jz      .Lecb_break
432         cmpq    %rbx,%rcx
433         jae     .Lecb_loop
434 .Lecb_unaligned_tail:
435         xorl    %eax,%eax
436         cmpq    %rsp,%rbp
437         cmoveq  %rcx,%rax
438         movq    %rdi,%r8
439         movq    %rcx,%rbx
440         subq    %rax,%rsp
441         shrq    $3,%rcx
442         leaq    (%rsp),%rdi
443 .byte   0xf3,0x48,0xa5          
444         movq    %rsp,%rsi
445         movq    %r8,%rdi
446         movq    %rbx,%rcx
447         jmp     .Lecb_loop
448 .p2align        4
449 .Lecb_break:
450         cmpq    %rbp,%rsp
451         je      .Lecb_done
452
453         pxor    %xmm0,%xmm0
454         leaq    (%rsp),%rax
455 .Lecb_bzero:
456         movaps  %xmm0,(%rax)
457         leaq    16(%rax),%rax
458         cmpq    %rax,%rbp
459         ja      .Lecb_bzero
460
461 .Lecb_done:
462         leaq    (%rbp),%rsp
463         jmp     .Lecb_exit
464
465 .p2align        4
466 .Lecb_aligned:
467         leaq    (%rsi,%rcx,1),%rbp
468         negq    %rbp
469         andq    $4095,%rbp
470         xorl    %eax,%eax
471         cmpq    $128,%rbp
472         movq    $128-1,%rbp
473         cmovaeq %rax,%rbp
474         andq    %rcx,%rbp
475         subq    %rbp,%rcx
476         jz      .Lecb_aligned_tail
477         leaq    -16(%rdx),%rax
478         leaq    16(%rdx),%rbx
479         shrq    $4,%rcx
480 .byte   0xf3,0x0f,0xa7,200      
481         testq   %rbp,%rbp
482         jz      .Lecb_exit
483
484 .Lecb_aligned_tail:
485         movq    %rdi,%r8
486         movq    %rbp,%rbx
487         movq    %rbp,%rcx
488         leaq    (%rsp),%rbp
489         subq    %rcx,%rsp
490         shrq    $3,%rcx
491         leaq    (%rsp),%rdi
492 .byte   0xf3,0x48,0xa5          
493         leaq    (%r8),%rdi
494         leaq    (%rsp),%rsi
495         movq    %rbx,%rcx
496         jmp     .Lecb_loop
497 .Lecb_exit:
498         movl    $1,%eax
499         leaq    8(%rsp),%rsp
500 .Lecb_abort:
501         popq    %rbx
502         popq    %rbp
503         movq    8(%rsp),%rdi
504         movq    16(%rsp),%rsi
505         .byte   0xf3,0xc3
506 .LSEH_end_padlock_ecb_encrypt:
507 .globl  padlock_cbc_encrypt
508 .def    padlock_cbc_encrypt;    .scl 2; .type 32;       .endef
509 .p2align        4
510 padlock_cbc_encrypt:
511         movq    %rdi,8(%rsp)
512         movq    %rsi,16(%rsp)
513         movq    %rsp,%rax
514 .LSEH_begin_padlock_cbc_encrypt:
515         movq    %rcx,%rdi
516         movq    %rdx,%rsi
517         movq    %r8,%rdx
518         movq    %r9,%rcx
519
520         pushq   %rbp
521         pushq   %rbx
522
523         xorl    %eax,%eax
524         testq   $15,%rdx
525         jnz     .Lcbc_abort
526         testq   $15,%rcx
527         jnz     .Lcbc_abort
528         leaq    .Lpadlock_saved_context(%rip),%rax
529         pushf
530         cld
531         call    _padlock_verify_ctx
532         leaq    16(%rdx),%rdx
533         xorl    %eax,%eax
534         xorl    %ebx,%ebx
535         testl   $32,(%rdx)
536         jnz     .Lcbc_aligned
537         testq   $15,%rdi
538         setz    %al
539         testq   $15,%rsi
540         setz    %bl
541         testl   %ebx,%eax
542         jnz     .Lcbc_aligned
543         negq    %rax
544         movq    $512,%rbx
545         notq    %rax
546         leaq    (%rsp),%rbp
547         cmpq    %rbx,%rcx
548         cmovcq  %rcx,%rbx
549         andq    %rbx,%rax
550         movq    %rcx,%rbx
551         negq    %rax
552         andq    $512-1,%rbx
553         leaq    (%rax,%rbp,1),%rsp
554         movq    $512,%rax
555         cmovzq  %rax,%rbx
556         cmpq    %rbx,%rcx
557         ja      .Lcbc_loop
558         movq    %rsi,%rax
559         cmpq    %rsp,%rbp
560         cmoveq  %rdi,%rax
561         addq    %rcx,%rax
562         negq    %rax
563         andq    $4095,%rax
564         cmpq    $64,%rax
565         movq    $-64,%rax
566         cmovaeq %rbx,%rax
567         andq    %rax,%rbx
568         jz      .Lcbc_unaligned_tail
569         jmp     .Lcbc_loop
570 .p2align        4
571 .Lcbc_loop:
572         cmpq    %rcx,%rbx
573         cmovaq  %rcx,%rbx
574         movq    %rdi,%r8
575         movq    %rsi,%r9
576         movq    %rcx,%r10
577         movq    %rbx,%rcx
578         movq    %rbx,%r11
579         testq   $15,%rdi
580         cmovnzq %rsp,%rdi
581         testq   $15,%rsi
582         jz      .Lcbc_inp_aligned
583         shrq    $3,%rcx
584 .byte   0xf3,0x48,0xa5          
585         subq    %rbx,%rdi
586         movq    %rbx,%rcx
587         movq    %rdi,%rsi
588 .Lcbc_inp_aligned:
589         leaq    -16(%rdx),%rax
590         leaq    16(%rdx),%rbx
591         shrq    $4,%rcx
592 .byte   0xf3,0x0f,0xa7,208      
593         movdqa  (%rax),%xmm0
594         movdqa  %xmm0,-16(%rdx)
595         movq    %r8,%rdi
596         movq    %r11,%rbx
597         testq   $15,%rdi
598         jz      .Lcbc_out_aligned
599         movq    %rbx,%rcx
600         leaq    (%rsp),%rsi
601         shrq    $3,%rcx
602 .byte   0xf3,0x48,0xa5          
603         subq    %rbx,%rdi
604 .Lcbc_out_aligned:
605         movq    %r9,%rsi
606         movq    %r10,%rcx
607         addq    %rbx,%rdi
608         addq    %rbx,%rsi
609         subq    %rbx,%rcx
610         movq    $512,%rbx
611         jz      .Lcbc_break
612         cmpq    %rbx,%rcx
613         jae     .Lcbc_loop
614 .Lcbc_unaligned_tail:
615         xorl    %eax,%eax
616         cmpq    %rsp,%rbp
617         cmoveq  %rcx,%rax
618         movq    %rdi,%r8
619         movq    %rcx,%rbx
620         subq    %rax,%rsp
621         shrq    $3,%rcx
622         leaq    (%rsp),%rdi
623 .byte   0xf3,0x48,0xa5          
624         movq    %rsp,%rsi
625         movq    %r8,%rdi
626         movq    %rbx,%rcx
627         jmp     .Lcbc_loop
628 .p2align        4
629 .Lcbc_break:
630         cmpq    %rbp,%rsp
631         je      .Lcbc_done
632
633         pxor    %xmm0,%xmm0
634         leaq    (%rsp),%rax
635 .Lcbc_bzero:
636         movaps  %xmm0,(%rax)
637         leaq    16(%rax),%rax
638         cmpq    %rax,%rbp
639         ja      .Lcbc_bzero
640
641 .Lcbc_done:
642         leaq    (%rbp),%rsp
643         jmp     .Lcbc_exit
644
645 .p2align        4
646 .Lcbc_aligned:
647         leaq    (%rsi,%rcx,1),%rbp
648         negq    %rbp
649         andq    $4095,%rbp
650         xorl    %eax,%eax
651         cmpq    $64,%rbp
652         movq    $64-1,%rbp
653         cmovaeq %rax,%rbp
654         andq    %rcx,%rbp
655         subq    %rbp,%rcx
656         jz      .Lcbc_aligned_tail
657         leaq    -16(%rdx),%rax
658         leaq    16(%rdx),%rbx
659         shrq    $4,%rcx
660 .byte   0xf3,0x0f,0xa7,208      
661         movdqa  (%rax),%xmm0
662         movdqa  %xmm0,-16(%rdx)
663         testq   %rbp,%rbp
664         jz      .Lcbc_exit
665
666 .Lcbc_aligned_tail:
667         movq    %rdi,%r8
668         movq    %rbp,%rbx
669         movq    %rbp,%rcx
670         leaq    (%rsp),%rbp
671         subq    %rcx,%rsp
672         shrq    $3,%rcx
673         leaq    (%rsp),%rdi
674 .byte   0xf3,0x48,0xa5          
675         leaq    (%r8),%rdi
676         leaq    (%rsp),%rsi
677         movq    %rbx,%rcx
678         jmp     .Lcbc_loop
679 .Lcbc_exit:
680         movl    $1,%eax
681         leaq    8(%rsp),%rsp
682 .Lcbc_abort:
683         popq    %rbx
684         popq    %rbp
685         movq    8(%rsp),%rdi
686         movq    16(%rsp),%rsi
687         .byte   0xf3,0xc3
688 .LSEH_end_padlock_cbc_encrypt:
689 .globl  padlock_cfb_encrypt
690 .def    padlock_cfb_encrypt;    .scl 2; .type 32;       .endef
691 .p2align        4
692 padlock_cfb_encrypt:
693         movq    %rdi,8(%rsp)
694         movq    %rsi,16(%rsp)
695         movq    %rsp,%rax
696 .LSEH_begin_padlock_cfb_encrypt:
697         movq    %rcx,%rdi
698         movq    %rdx,%rsi
699         movq    %r8,%rdx
700         movq    %r9,%rcx
701
702         pushq   %rbp
703         pushq   %rbx
704
705         xorl    %eax,%eax
706         testq   $15,%rdx
707         jnz     .Lcfb_abort
708         testq   $15,%rcx
709         jnz     .Lcfb_abort
710         leaq    .Lpadlock_saved_context(%rip),%rax
711         pushf
712         cld
713         call    _padlock_verify_ctx
714         leaq    16(%rdx),%rdx
715         xorl    %eax,%eax
716         xorl    %ebx,%ebx
717         testl   $32,(%rdx)
718         jnz     .Lcfb_aligned
719         testq   $15,%rdi
720         setz    %al
721         testq   $15,%rsi
722         setz    %bl
723         testl   %ebx,%eax
724         jnz     .Lcfb_aligned
725         negq    %rax
726         movq    $512,%rbx
727         notq    %rax
728         leaq    (%rsp),%rbp
729         cmpq    %rbx,%rcx
730         cmovcq  %rcx,%rbx
731         andq    %rbx,%rax
732         movq    %rcx,%rbx
733         negq    %rax
734         andq    $512-1,%rbx
735         leaq    (%rax,%rbp,1),%rsp
736         movq    $512,%rax
737         cmovzq  %rax,%rbx
738         jmp     .Lcfb_loop
739 .p2align        4
740 .Lcfb_loop:
741         cmpq    %rcx,%rbx
742         cmovaq  %rcx,%rbx
743         movq    %rdi,%r8
744         movq    %rsi,%r9
745         movq    %rcx,%r10
746         movq    %rbx,%rcx
747         movq    %rbx,%r11
748         testq   $15,%rdi
749         cmovnzq %rsp,%rdi
750         testq   $15,%rsi
751         jz      .Lcfb_inp_aligned
752         shrq    $3,%rcx
753 .byte   0xf3,0x48,0xa5          
754         subq    %rbx,%rdi
755         movq    %rbx,%rcx
756         movq    %rdi,%rsi
757 .Lcfb_inp_aligned:
758         leaq    -16(%rdx),%rax
759         leaq    16(%rdx),%rbx
760         shrq    $4,%rcx
761 .byte   0xf3,0x0f,0xa7,224      
762         movdqa  (%rax),%xmm0
763         movdqa  %xmm0,-16(%rdx)
764         movq    %r8,%rdi
765         movq    %r11,%rbx
766         testq   $15,%rdi
767         jz      .Lcfb_out_aligned
768         movq    %rbx,%rcx
769         leaq    (%rsp),%rsi
770         shrq    $3,%rcx
771 .byte   0xf3,0x48,0xa5          
772         subq    %rbx,%rdi
773 .Lcfb_out_aligned:
774         movq    %r9,%rsi
775         movq    %r10,%rcx
776         addq    %rbx,%rdi
777         addq    %rbx,%rsi
778         subq    %rbx,%rcx
779         movq    $512,%rbx
780         jnz     .Lcfb_loop
781         cmpq    %rbp,%rsp
782         je      .Lcfb_done
783
784         pxor    %xmm0,%xmm0
785         leaq    (%rsp),%rax
786 .Lcfb_bzero:
787         movaps  %xmm0,(%rax)
788         leaq    16(%rax),%rax
789         cmpq    %rax,%rbp
790         ja      .Lcfb_bzero
791
792 .Lcfb_done:
793         leaq    (%rbp),%rsp
794         jmp     .Lcfb_exit
795
796 .p2align        4
797 .Lcfb_aligned:
798         leaq    -16(%rdx),%rax
799         leaq    16(%rdx),%rbx
800         shrq    $4,%rcx
801 .byte   0xf3,0x0f,0xa7,224      
802         movdqa  (%rax),%xmm0
803         movdqa  %xmm0,-16(%rdx)
804 .Lcfb_exit:
805         movl    $1,%eax
806         leaq    8(%rsp),%rsp
807 .Lcfb_abort:
808         popq    %rbx
809         popq    %rbp
810         movq    8(%rsp),%rdi
811         movq    16(%rsp),%rsi
812         .byte   0xf3,0xc3
813 .LSEH_end_padlock_cfb_encrypt:
814 .globl  padlock_ofb_encrypt
815 .def    padlock_ofb_encrypt;    .scl 2; .type 32;       .endef
816 .p2align        4
817 padlock_ofb_encrypt:
818         movq    %rdi,8(%rsp)
819         movq    %rsi,16(%rsp)
820         movq    %rsp,%rax
821 .LSEH_begin_padlock_ofb_encrypt:
822         movq    %rcx,%rdi
823         movq    %rdx,%rsi
824         movq    %r8,%rdx
825         movq    %r9,%rcx
826
827         pushq   %rbp
828         pushq   %rbx
829
830         xorl    %eax,%eax
831         testq   $15,%rdx
832         jnz     .Lofb_abort
833         testq   $15,%rcx
834         jnz     .Lofb_abort
835         leaq    .Lpadlock_saved_context(%rip),%rax
836         pushf
837         cld
838         call    _padlock_verify_ctx
839         leaq    16(%rdx),%rdx
840         xorl    %eax,%eax
841         xorl    %ebx,%ebx
842         testl   $32,(%rdx)
843         jnz     .Lofb_aligned
844         testq   $15,%rdi
845         setz    %al
846         testq   $15,%rsi
847         setz    %bl
848         testl   %ebx,%eax
849         jnz     .Lofb_aligned
850         negq    %rax
851         movq    $512,%rbx
852         notq    %rax
853         leaq    (%rsp),%rbp
854         cmpq    %rbx,%rcx
855         cmovcq  %rcx,%rbx
856         andq    %rbx,%rax
857         movq    %rcx,%rbx
858         negq    %rax
859         andq    $512-1,%rbx
860         leaq    (%rax,%rbp,1),%rsp
861         movq    $512,%rax
862         cmovzq  %rax,%rbx
863         jmp     .Lofb_loop
864 .p2align        4
865 .Lofb_loop:
866         cmpq    %rcx,%rbx
867         cmovaq  %rcx,%rbx
868         movq    %rdi,%r8
869         movq    %rsi,%r9
870         movq    %rcx,%r10
871         movq    %rbx,%rcx
872         movq    %rbx,%r11
873         testq   $15,%rdi
874         cmovnzq %rsp,%rdi
875         testq   $15,%rsi
876         jz      .Lofb_inp_aligned
877         shrq    $3,%rcx
878 .byte   0xf3,0x48,0xa5          
879         subq    %rbx,%rdi
880         movq    %rbx,%rcx
881         movq    %rdi,%rsi
882 .Lofb_inp_aligned:
883         leaq    -16(%rdx),%rax
884         leaq    16(%rdx),%rbx
885         shrq    $4,%rcx
886 .byte   0xf3,0x0f,0xa7,232      
887         movdqa  (%rax),%xmm0
888         movdqa  %xmm0,-16(%rdx)
889         movq    %r8,%rdi
890         movq    %r11,%rbx
891         testq   $15,%rdi
892         jz      .Lofb_out_aligned
893         movq    %rbx,%rcx
894         leaq    (%rsp),%rsi
895         shrq    $3,%rcx
896 .byte   0xf3,0x48,0xa5          
897         subq    %rbx,%rdi
898 .Lofb_out_aligned:
899         movq    %r9,%rsi
900         movq    %r10,%rcx
901         addq    %rbx,%rdi
902         addq    %rbx,%rsi
903         subq    %rbx,%rcx
904         movq    $512,%rbx
905         jnz     .Lofb_loop
906         cmpq    %rbp,%rsp
907         je      .Lofb_done
908
909         pxor    %xmm0,%xmm0
910         leaq    (%rsp),%rax
911 .Lofb_bzero:
912         movaps  %xmm0,(%rax)
913         leaq    16(%rax),%rax
914         cmpq    %rax,%rbp
915         ja      .Lofb_bzero
916
917 .Lofb_done:
918         leaq    (%rbp),%rsp
919         jmp     .Lofb_exit
920
921 .p2align        4
922 .Lofb_aligned:
923         leaq    -16(%rdx),%rax
924         leaq    16(%rdx),%rbx
925         shrq    $4,%rcx
926 .byte   0xf3,0x0f,0xa7,232      
927         movdqa  (%rax),%xmm0
928         movdqa  %xmm0,-16(%rdx)
929 .Lofb_exit:
930         movl    $1,%eax
931         leaq    8(%rsp),%rsp
932 .Lofb_abort:
933         popq    %rbx
934         popq    %rbp
935         movq    8(%rsp),%rdi
936         movq    16(%rsp),%rsi
937         .byte   0xf3,0xc3
938 .LSEH_end_padlock_ofb_encrypt:
939 .globl  padlock_ctr32_encrypt
940 .def    padlock_ctr32_encrypt;  .scl 2; .type 32;       .endef
941 .p2align        4
942 padlock_ctr32_encrypt:
943         movq    %rdi,8(%rsp)
944         movq    %rsi,16(%rsp)
945         movq    %rsp,%rax
946 .LSEH_begin_padlock_ctr32_encrypt:
947         movq    %rcx,%rdi
948         movq    %rdx,%rsi
949         movq    %r8,%rdx
950         movq    %r9,%rcx
951
952         pushq   %rbp
953         pushq   %rbx
954
955         xorl    %eax,%eax
956         testq   $15,%rdx
957         jnz     .Lctr32_abort
958         testq   $15,%rcx
959         jnz     .Lctr32_abort
960         leaq    .Lpadlock_saved_context(%rip),%rax
961         pushf
962         cld
963         call    _padlock_verify_ctx
964         leaq    16(%rdx),%rdx
965         xorl    %eax,%eax
966         xorl    %ebx,%ebx
967         testl   $32,(%rdx)
968         jnz     .Lctr32_aligned
969         testq   $15,%rdi
970         setz    %al
971         testq   $15,%rsi
972         setz    %bl
973         testl   %ebx,%eax
974         jnz     .Lctr32_aligned
975         negq    %rax
976         movq    $512,%rbx
977         notq    %rax
978         leaq    (%rsp),%rbp
979         cmpq    %rbx,%rcx
980         cmovcq  %rcx,%rbx
981         andq    %rbx,%rax
982         movq    %rcx,%rbx
983         negq    %rax
984         andq    $512-1,%rbx
985         leaq    (%rax,%rbp,1),%rsp
986         movq    $512,%rax
987         cmovzq  %rax,%rbx
988 .Lctr32_reenter:
989         movl    -4(%rdx),%eax
990         bswapl  %eax
991         negl    %eax
992         andl    $31,%eax
993         movq    $512,%rbx
994         shll    $4,%eax
995         cmovzq  %rbx,%rax
996         cmpq    %rax,%rcx
997         cmovaq  %rax,%rbx
998         cmovbeq %rcx,%rbx
999         cmpq    %rbx,%rcx
1000         ja      .Lctr32_loop
1001         movq    %rsi,%rax
1002         cmpq    %rsp,%rbp
1003         cmoveq  %rdi,%rax
1004         addq    %rcx,%rax
1005         negq    %rax
1006         andq    $4095,%rax
1007         cmpq    $32,%rax
1008         movq    $-32,%rax
1009         cmovaeq %rbx,%rax
1010         andq    %rax,%rbx
1011         jz      .Lctr32_unaligned_tail
1012         jmp     .Lctr32_loop
1013 .p2align        4
1014 .Lctr32_loop:
1015         cmpq    %rcx,%rbx
1016         cmovaq  %rcx,%rbx
1017         movq    %rdi,%r8
1018         movq    %rsi,%r9
1019         movq    %rcx,%r10
1020         movq    %rbx,%rcx
1021         movq    %rbx,%r11
1022         testq   $15,%rdi
1023         cmovnzq %rsp,%rdi
1024         testq   $15,%rsi
1025         jz      .Lctr32_inp_aligned
1026         shrq    $3,%rcx
1027 .byte   0xf3,0x48,0xa5          
1028         subq    %rbx,%rdi
1029         movq    %rbx,%rcx
1030         movq    %rdi,%rsi
1031 .Lctr32_inp_aligned:
1032         leaq    -16(%rdx),%rax
1033         leaq    16(%rdx),%rbx
1034         shrq    $4,%rcx
1035 .byte   0xf3,0x0f,0xa7,216      
1036         movl    -4(%rdx),%eax
1037         testl   $4294901760,%eax
1038         jnz     .Lctr32_no_carry
1039         bswapl  %eax
1040         addl    $65536,%eax
1041         bswapl  %eax
1042         movl    %eax,-4(%rdx)
1043 .Lctr32_no_carry:
1044         movq    %r8,%rdi
1045         movq    %r11,%rbx
1046         testq   $15,%rdi
1047         jz      .Lctr32_out_aligned
1048         movq    %rbx,%rcx
1049         leaq    (%rsp),%rsi
1050         shrq    $3,%rcx
1051 .byte   0xf3,0x48,0xa5          
1052         subq    %rbx,%rdi
1053 .Lctr32_out_aligned:
1054         movq    %r9,%rsi
1055         movq    %r10,%rcx
1056         addq    %rbx,%rdi
1057         addq    %rbx,%rsi
1058         subq    %rbx,%rcx
1059         movq    $512,%rbx
1060         jz      .Lctr32_break
1061         cmpq    %rbx,%rcx
1062         jae     .Lctr32_loop
1063         movq    %rcx,%rbx
1064         movq    %rsi,%rax
1065         cmpq    %rsp,%rbp
1066         cmoveq  %rdi,%rax
1067         addq    %rcx,%rax
1068         negq    %rax
1069         andq    $4095,%rax
1070         cmpq    $32,%rax
1071         movq    $-32,%rax
1072         cmovaeq %rbx,%rax
1073         andq    %rax,%rbx
1074         jnz     .Lctr32_loop
1075 .Lctr32_unaligned_tail:
1076         xorl    %eax,%eax
1077         cmpq    %rsp,%rbp
1078         cmoveq  %rcx,%rax
1079         movq    %rdi,%r8
1080         movq    %rcx,%rbx
1081         subq    %rax,%rsp
1082         shrq    $3,%rcx
1083         leaq    (%rsp),%rdi
1084 .byte   0xf3,0x48,0xa5          
1085         movq    %rsp,%rsi
1086         movq    %r8,%rdi
1087         movq    %rbx,%rcx
1088         jmp     .Lctr32_loop
1089 .p2align        4
1090 .Lctr32_break:
1091         cmpq    %rbp,%rsp
1092         je      .Lctr32_done
1093
1094         pxor    %xmm0,%xmm0
1095         leaq    (%rsp),%rax
1096 .Lctr32_bzero:
1097         movaps  %xmm0,(%rax)
1098         leaq    16(%rax),%rax
1099         cmpq    %rax,%rbp
1100         ja      .Lctr32_bzero
1101
1102 .Lctr32_done:
1103         leaq    (%rbp),%rsp
1104         jmp     .Lctr32_exit
1105
1106 .p2align        4
1107 .Lctr32_aligned:
1108         movl    -4(%rdx),%eax
1109         bswapl  %eax
1110         negl    %eax
1111         andl    $65535,%eax
1112         movq    $1048576,%rbx
1113         shll    $4,%eax
1114         cmovzq  %rbx,%rax
1115         cmpq    %rax,%rcx
1116         cmovaq  %rax,%rbx
1117         cmovbeq %rcx,%rbx
1118         jbe     .Lctr32_aligned_skip
1119
1120 .Lctr32_aligned_loop:
1121         movq    %rcx,%r10
1122         movq    %rbx,%rcx
1123         movq    %rbx,%r11
1124
1125         leaq    -16(%rdx),%rax
1126         leaq    16(%rdx),%rbx
1127         shrq    $4,%rcx
1128 .byte   0xf3,0x0f,0xa7,216      
1129
1130         movl    -4(%rdx),%eax
1131         bswapl  %eax
1132         addl    $65536,%eax
1133         bswapl  %eax
1134         movl    %eax,-4(%rdx)
1135
1136         movq    %r10,%rcx
1137         subq    %r11,%rcx
1138         movq    $1048576,%rbx
1139         jz      .Lctr32_exit
1140         cmpq    %rbx,%rcx
1141         jae     .Lctr32_aligned_loop
1142
1143 .Lctr32_aligned_skip:
1144         leaq    (%rsi,%rcx,1),%rbp
1145         negq    %rbp
1146         andq    $4095,%rbp
1147         xorl    %eax,%eax
1148         cmpq    $32,%rbp
1149         movq    $32-1,%rbp
1150         cmovaeq %rax,%rbp
1151         andq    %rcx,%rbp
1152         subq    %rbp,%rcx
1153         jz      .Lctr32_aligned_tail
1154         leaq    -16(%rdx),%rax
1155         leaq    16(%rdx),%rbx
1156         shrq    $4,%rcx
1157 .byte   0xf3,0x0f,0xa7,216      
1158         testq   %rbp,%rbp
1159         jz      .Lctr32_exit
1160
1161 .Lctr32_aligned_tail:
1162         movq    %rdi,%r8
1163         movq    %rbp,%rbx
1164         movq    %rbp,%rcx
1165         leaq    (%rsp),%rbp
1166         subq    %rcx,%rsp
1167         shrq    $3,%rcx
1168         leaq    (%rsp),%rdi
1169 .byte   0xf3,0x48,0xa5          
1170         leaq    (%r8),%rdi
1171         leaq    (%rsp),%rsi
1172         movq    %rbx,%rcx
1173         jmp     .Lctr32_loop
1174 .Lctr32_exit:
1175         movl    $1,%eax
1176         leaq    8(%rsp),%rsp
1177 .Lctr32_abort:
1178         popq    %rbx
1179         popq    %rbp
1180         movq    8(%rsp),%rdi
1181         movq    16(%rsp),%rsi
1182         .byte   0xf3,0xc3
1183 .LSEH_end_padlock_ctr32_encrypt:
1184 .byte   86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1185 .p2align        4
1186 .data   
1187 .p2align        3
1188 .Lpadlock_saved_context:
1189 .quad   0