Added Appro's SSSE3 SHA implementations
[gnutls:gnutls.git] / lib / accelerated / x86 / elf / e_padlock-x86_64.s
1 # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7
8 #     * Redistributions of source code must retain copyright notices,
9 #      this list of conditions and the following disclaimer.
10 #
11 #     * Redistributions in binary form must reproduce the above
12 #      copyright notice, this list of conditions and the following
13 #      disclaimer in the documentation and/or other materials
14 #      provided with the distribution.
15 #
16 #     * Neither the name of the Andy Polyakov nor the names of its
17 #      copyright holder and contributors may be used to endorse or
18 #      promote products derived from this software without specific
19 #      prior written permission.
20 #
21 # ALTERNATIVELY, provided that this notice is retained in full, this
22 # product may be distributed under the terms of the GNU General Public
23 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24 # those given above.
25 #
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #
38 # *** This file is auto-generated ***
39 #
40 .text   
41 .globl  padlock_capability
42 .type   padlock_capability,@function
43 .align  16
44 padlock_capability:
45         movq    %rbx,%r8
46         xorl    %eax,%eax
47         cpuid
48         xorl    %eax,%eax
49         cmpl    $1953391939,%ebx
50         jne     .Lnoluck
51         cmpl    $1215460705,%edx
52         jne     .Lnoluck
53         cmpl    $1936487777,%ecx
54         jne     .Lnoluck
55         movl    $3221225472,%eax
56         cpuid
57         movl    %eax,%edx
58         xorl    %eax,%eax
59         cmpl    $3221225473,%edx
60         jb      .Lnoluck
61         movl    $3221225473,%eax
62         cpuid
63         movl    %edx,%eax
64         andl    $4294967279,%eax
65         orl     $16,%eax
66 .Lnoluck:
67         movq    %r8,%rbx
68         .byte   0xf3,0xc3
69 .size   padlock_capability,.-padlock_capability
70
71 .globl  padlock_key_bswap
72 .type   padlock_key_bswap,@function
73 .align  16
74 padlock_key_bswap:
75         movl    240(%rdi),%edx
76 .Lbswap_loop:
77         movl    (%rdi),%eax
78         bswapl  %eax
79         movl    %eax,(%rdi)
80         leaq    4(%rdi),%rdi
81         subl    $1,%edx
82         jnz     .Lbswap_loop
83         .byte   0xf3,0xc3
84 .size   padlock_key_bswap,.-padlock_key_bswap
85
86 .globl  padlock_verify_context
87 .type   padlock_verify_context,@function
88 .align  16
89 padlock_verify_context:
90         movq    %rdi,%rdx
91         pushf
92         leaq    .Lpadlock_saved_context(%rip),%rax
93         call    _padlock_verify_ctx
94         leaq    8(%rsp),%rsp
95         .byte   0xf3,0xc3
96 .size   padlock_verify_context,.-padlock_verify_context
97
98 .type   _padlock_verify_ctx,@function
99 .align  16
100 _padlock_verify_ctx:
101         movq    8(%rsp),%r8
102         btq     $30,%r8
103         jnc     .Lverified
104         cmpq    (%rax),%rdx
105         je      .Lverified
106         pushf
107         popf
108 .Lverified:
109         movq    %rdx,(%rax)
110         .byte   0xf3,0xc3
111 .size   _padlock_verify_ctx,.-_padlock_verify_ctx
112
113 .globl  padlock_reload_key
114 .type   padlock_reload_key,@function
115 .align  16
116 padlock_reload_key:
117         pushf
118         popf
119         .byte   0xf3,0xc3
120 .size   padlock_reload_key,.-padlock_reload_key
121
122 .globl  padlock_aes_block
123 .type   padlock_aes_block,@function
124 .align  16
125 padlock_aes_block:
126         movq    %rbx,%r8
127         movq    $1,%rcx
128         leaq    32(%rdx),%rbx
129         leaq    16(%rdx),%rdx
130 .byte   0xf3,0x0f,0xa7,0xc8     
131         movq    %r8,%rbx
132         .byte   0xf3,0xc3
133 .size   padlock_aes_block,.-padlock_aes_block
134
135 .globl  padlock_xstore
136 .type   padlock_xstore,@function
137 .align  16
138 padlock_xstore:
139         movl    %esi,%edx
140 .byte   0x0f,0xa7,0xc0          
141         .byte   0xf3,0xc3
142 .size   padlock_xstore,.-padlock_xstore
143
144 .globl  padlock_sha1_oneshot
145 .type   padlock_sha1_oneshot,@function
146 .align  16
147 padlock_sha1_oneshot:
148         movq    %rdx,%rcx
149         movq    %rdi,%rdx
150         movups  (%rdi),%xmm0
151         subq    $128+8,%rsp
152         movl    16(%rdi),%eax
153         movaps  %xmm0,(%rsp)
154         movq    %rsp,%rdi
155         movl    %eax,16(%rsp)
156         xorq    %rax,%rax
157 .byte   0xf3,0x0f,0xa6,0xc8     
158         movaps  (%rsp),%xmm0
159         movl    16(%rsp),%eax
160         addq    $128+8,%rsp
161         movups  %xmm0,(%rdx)
162         movl    %eax,16(%rdx)
163         .byte   0xf3,0xc3
164 .size   padlock_sha1_oneshot,.-padlock_sha1_oneshot
165
166 .globl  padlock_sha1_blocks
167 .type   padlock_sha1_blocks,@function
168 .align  16
169 padlock_sha1_blocks:
170         movq    %rdx,%rcx
171         movq    %rdi,%rdx
172         movups  (%rdi),%xmm0
173         subq    $128+8,%rsp
174         movl    16(%rdi),%eax
175         movaps  %xmm0,(%rsp)
176         movq    %rsp,%rdi
177         movl    %eax,16(%rsp)
178         movq    $-1,%rax
179 .byte   0xf3,0x0f,0xa6,0xc8     
180         movaps  (%rsp),%xmm0
181         movl    16(%rsp),%eax
182         addq    $128+8,%rsp
183         movups  %xmm0,(%rdx)
184         movl    %eax,16(%rdx)
185         .byte   0xf3,0xc3
186 .size   padlock_sha1_blocks,.-padlock_sha1_blocks
187
188 .globl  padlock_sha256_oneshot
189 .type   padlock_sha256_oneshot,@function
190 .align  16
191 padlock_sha256_oneshot:
192         movq    %rdx,%rcx
193         movq    %rdi,%rdx
194         movups  (%rdi),%xmm0
195         subq    $128+8,%rsp
196         movups  16(%rdi),%xmm1
197         movaps  %xmm0,(%rsp)
198         movq    %rsp,%rdi
199         movaps  %xmm1,16(%rsp)
200         xorq    %rax,%rax
201 .byte   0xf3,0x0f,0xa6,0xd0     
202         movaps  (%rsp),%xmm0
203         movaps  16(%rsp),%xmm1
204         addq    $128+8,%rsp
205         movups  %xmm0,(%rdx)
206         movups  %xmm1,16(%rdx)
207         .byte   0xf3,0xc3
208 .size   padlock_sha256_oneshot,.-padlock_sha256_oneshot
209
210 .globl  padlock_sha256_blocks
211 .type   padlock_sha256_blocks,@function
212 .align  16
213 padlock_sha256_blocks:
214         movq    %rdx,%rcx
215         movq    %rdi,%rdx
216         movups  (%rdi),%xmm0
217         subq    $128+8,%rsp
218         movups  16(%rdi),%xmm1
219         movaps  %xmm0,(%rsp)
220         movq    %rsp,%rdi
221         movaps  %xmm1,16(%rsp)
222         movq    $-1,%rax
223 .byte   0xf3,0x0f,0xa6,0xd0     
224         movaps  (%rsp),%xmm0
225         movaps  16(%rsp),%xmm1
226         addq    $128+8,%rsp
227         movups  %xmm0,(%rdx)
228         movups  %xmm1,16(%rdx)
229         .byte   0xf3,0xc3
230 .size   padlock_sha256_blocks,.-padlock_sha256_blocks
231
232 .globl  padlock_sha512_blocks
233 .type   padlock_sha512_blocks,@function
234 .align  16
235 padlock_sha512_blocks:
236         movq    %rdx,%rcx
237         movq    %rdi,%rdx
238         movups  (%rdi),%xmm0
239         subq    $128+8,%rsp
240         movups  16(%rdi),%xmm1
241         movups  32(%rdi),%xmm2
242         movups  48(%rdi),%xmm3
243         movaps  %xmm0,(%rsp)
244         movq    %rsp,%rdi
245         movaps  %xmm1,16(%rsp)
246         movaps  %xmm2,32(%rsp)
247         movaps  %xmm3,48(%rsp)
248 .byte   0xf3,0x0f,0xa6,0xe0     
249         movaps  (%rsp),%xmm0
250         movaps  16(%rsp),%xmm1
251         movaps  32(%rsp),%xmm2
252         movaps  48(%rsp),%xmm3
253         addq    $128+8,%rsp
254         movups  %xmm0,(%rdx)
255         movups  %xmm1,16(%rdx)
256         movups  %xmm2,32(%rdx)
257         movups  %xmm3,48(%rdx)
258         .byte   0xf3,0xc3
259 .size   padlock_sha512_blocks,.-padlock_sha512_blocks
260 .globl  padlock_ecb_encrypt
261 .type   padlock_ecb_encrypt,@function
262 .align  16
263 padlock_ecb_encrypt:
264         pushq   %rbp
265         pushq   %rbx
266
267         xorl    %eax,%eax
268         testq   $15,%rdx
269         jnz     .Lecb_abort
270         testq   $15,%rcx
271         jnz     .Lecb_abort
272         leaq    .Lpadlock_saved_context(%rip),%rax
273         pushf
274         cld
275         call    _padlock_verify_ctx
276         leaq    16(%rdx),%rdx
277         xorl    %eax,%eax
278         xorl    %ebx,%ebx
279         testl   $32,(%rdx)
280         jnz     .Lecb_aligned
281         testq   $15,%rdi
282         setz    %al
283         testq   $15,%rsi
284         setz    %bl
285         testl   %ebx,%eax
286         jnz     .Lecb_aligned
287         negq    %rax
288         movq    $512,%rbx
289         notq    %rax
290         leaq    (%rsp),%rbp
291         cmpq    %rbx,%rcx
292         cmovcq  %rcx,%rbx
293         andq    %rbx,%rax
294         movq    %rcx,%rbx
295         negq    %rax
296         andq    $512-1,%rbx
297         leaq    (%rax,%rbp,1),%rsp
298         movq    $512,%rax
299         cmovzq  %rax,%rbx
300         cmpq    %rbx,%rcx
301         ja      .Lecb_loop
302         movq    %rsi,%rax
303         cmpq    %rsp,%rbp
304         cmoveq  %rdi,%rax
305         addq    %rcx,%rax
306         negq    %rax
307         andq    $4095,%rax
308         cmpq    $128,%rax
309         movq    $-128,%rax
310         cmovaeq %rbx,%rax
311         andq    %rax,%rbx
312         jz      .Lecb_unaligned_tail
313         jmp     .Lecb_loop
314 .align  16
315 .Lecb_loop:
316         cmpq    %rcx,%rbx
317         cmovaq  %rcx,%rbx
318         movq    %rdi,%r8
319         movq    %rsi,%r9
320         movq    %rcx,%r10
321         movq    %rbx,%rcx
322         movq    %rbx,%r11
323         testq   $15,%rdi
324         cmovnzq %rsp,%rdi
325         testq   $15,%rsi
326         jz      .Lecb_inp_aligned
327         shrq    $3,%rcx
328 .byte   0xf3,0x48,0xa5          
329         subq    %rbx,%rdi
330         movq    %rbx,%rcx
331         movq    %rdi,%rsi
332 .Lecb_inp_aligned:
333         leaq    -16(%rdx),%rax
334         leaq    16(%rdx),%rbx
335         shrq    $4,%rcx
336 .byte   0xf3,0x0f,0xa7,200      
337         movq    %r8,%rdi
338         movq    %r11,%rbx
339         testq   $15,%rdi
340         jz      .Lecb_out_aligned
341         movq    %rbx,%rcx
342         leaq    (%rsp),%rsi
343         shrq    $3,%rcx
344 .byte   0xf3,0x48,0xa5          
345         subq    %rbx,%rdi
346 .Lecb_out_aligned:
347         movq    %r9,%rsi
348         movq    %r10,%rcx
349         addq    %rbx,%rdi
350         addq    %rbx,%rsi
351         subq    %rbx,%rcx
352         movq    $512,%rbx
353         jz      .Lecb_break
354         cmpq    %rbx,%rcx
355         jae     .Lecb_loop
356 .Lecb_unaligned_tail:
357         xorl    %eax,%eax
358         cmpq    %rsp,%rbp
359         cmoveq  %rcx,%rax
360         movq    %rdi,%r8
361         movq    %rcx,%rbx
362         subq    %rax,%rsp
363         shrq    $3,%rcx
364         leaq    (%rsp),%rdi
365 .byte   0xf3,0x48,0xa5          
366         movq    %rsp,%rsi
367         movq    %r8,%rdi
368         movq    %rbx,%rcx
369         jmp     .Lecb_loop
370 .align  16
371 .Lecb_break:
372         cmpq    %rbp,%rsp
373         je      .Lecb_done
374
375         pxor    %xmm0,%xmm0
376         leaq    (%rsp),%rax
377 .Lecb_bzero:
378         movaps  %xmm0,(%rax)
379         leaq    16(%rax),%rax
380         cmpq    %rax,%rbp
381         ja      .Lecb_bzero
382
383 .Lecb_done:
384         leaq    (%rbp),%rsp
385         jmp     .Lecb_exit
386
387 .align  16
388 .Lecb_aligned:
389         leaq    (%rsi,%rcx,1),%rbp
390         negq    %rbp
391         andq    $4095,%rbp
392         xorl    %eax,%eax
393         cmpq    $128,%rbp
394         movq    $128-1,%rbp
395         cmovaeq %rax,%rbp
396         andq    %rcx,%rbp
397         subq    %rbp,%rcx
398         jz      .Lecb_aligned_tail
399         leaq    -16(%rdx),%rax
400         leaq    16(%rdx),%rbx
401         shrq    $4,%rcx
402 .byte   0xf3,0x0f,0xa7,200      
403         testq   %rbp,%rbp
404         jz      .Lecb_exit
405
406 .Lecb_aligned_tail:
407         movq    %rdi,%r8
408         movq    %rbp,%rbx
409         movq    %rbp,%rcx
410         leaq    (%rsp),%rbp
411         subq    %rcx,%rsp
412         shrq    $3,%rcx
413         leaq    (%rsp),%rdi
414 .byte   0xf3,0x48,0xa5          
415         leaq    (%r8),%rdi
416         leaq    (%rsp),%rsi
417         movq    %rbx,%rcx
418         jmp     .Lecb_loop
419 .Lecb_exit:
420         movl    $1,%eax
421         leaq    8(%rsp),%rsp
422 .Lecb_abort:
423         popq    %rbx
424         popq    %rbp
425         .byte   0xf3,0xc3
426 .size   padlock_ecb_encrypt,.-padlock_ecb_encrypt
427 .globl  padlock_cbc_encrypt
428 .type   padlock_cbc_encrypt,@function
429 .align  16
430 padlock_cbc_encrypt:
431         pushq   %rbp
432         pushq   %rbx
433
434         xorl    %eax,%eax
435         testq   $15,%rdx
436         jnz     .Lcbc_abort
437         testq   $15,%rcx
438         jnz     .Lcbc_abort
439         leaq    .Lpadlock_saved_context(%rip),%rax
440         pushf
441         cld
442         call    _padlock_verify_ctx
443         leaq    16(%rdx),%rdx
444         xorl    %eax,%eax
445         xorl    %ebx,%ebx
446         testl   $32,(%rdx)
447         jnz     .Lcbc_aligned
448         testq   $15,%rdi
449         setz    %al
450         testq   $15,%rsi
451         setz    %bl
452         testl   %ebx,%eax
453         jnz     .Lcbc_aligned
454         negq    %rax
455         movq    $512,%rbx
456         notq    %rax
457         leaq    (%rsp),%rbp
458         cmpq    %rbx,%rcx
459         cmovcq  %rcx,%rbx
460         andq    %rbx,%rax
461         movq    %rcx,%rbx
462         negq    %rax
463         andq    $512-1,%rbx
464         leaq    (%rax,%rbp,1),%rsp
465         movq    $512,%rax
466         cmovzq  %rax,%rbx
467         cmpq    %rbx,%rcx
468         ja      .Lcbc_loop
469         movq    %rsi,%rax
470         cmpq    %rsp,%rbp
471         cmoveq  %rdi,%rax
472         addq    %rcx,%rax
473         negq    %rax
474         andq    $4095,%rax
475         cmpq    $64,%rax
476         movq    $-64,%rax
477         cmovaeq %rbx,%rax
478         andq    %rax,%rbx
479         jz      .Lcbc_unaligned_tail
480         jmp     .Lcbc_loop
481 .align  16
482 .Lcbc_loop:
483         cmpq    %rcx,%rbx
484         cmovaq  %rcx,%rbx
485         movq    %rdi,%r8
486         movq    %rsi,%r9
487         movq    %rcx,%r10
488         movq    %rbx,%rcx
489         movq    %rbx,%r11
490         testq   $15,%rdi
491         cmovnzq %rsp,%rdi
492         testq   $15,%rsi
493         jz      .Lcbc_inp_aligned
494         shrq    $3,%rcx
495 .byte   0xf3,0x48,0xa5          
496         subq    %rbx,%rdi
497         movq    %rbx,%rcx
498         movq    %rdi,%rsi
499 .Lcbc_inp_aligned:
500         leaq    -16(%rdx),%rax
501         leaq    16(%rdx),%rbx
502         shrq    $4,%rcx
503 .byte   0xf3,0x0f,0xa7,208      
504         movdqa  (%rax),%xmm0
505         movdqa  %xmm0,-16(%rdx)
506         movq    %r8,%rdi
507         movq    %r11,%rbx
508         testq   $15,%rdi
509         jz      .Lcbc_out_aligned
510         movq    %rbx,%rcx
511         leaq    (%rsp),%rsi
512         shrq    $3,%rcx
513 .byte   0xf3,0x48,0xa5          
514         subq    %rbx,%rdi
515 .Lcbc_out_aligned:
516         movq    %r9,%rsi
517         movq    %r10,%rcx
518         addq    %rbx,%rdi
519         addq    %rbx,%rsi
520         subq    %rbx,%rcx
521         movq    $512,%rbx
522         jz      .Lcbc_break
523         cmpq    %rbx,%rcx
524         jae     .Lcbc_loop
525 .Lcbc_unaligned_tail:
526         xorl    %eax,%eax
527         cmpq    %rsp,%rbp
528         cmoveq  %rcx,%rax
529         movq    %rdi,%r8
530         movq    %rcx,%rbx
531         subq    %rax,%rsp
532         shrq    $3,%rcx
533         leaq    (%rsp),%rdi
534 .byte   0xf3,0x48,0xa5          
535         movq    %rsp,%rsi
536         movq    %r8,%rdi
537         movq    %rbx,%rcx
538         jmp     .Lcbc_loop
539 .align  16
540 .Lcbc_break:
541         cmpq    %rbp,%rsp
542         je      .Lcbc_done
543
544         pxor    %xmm0,%xmm0
545         leaq    (%rsp),%rax
546 .Lcbc_bzero:
547         movaps  %xmm0,(%rax)
548         leaq    16(%rax),%rax
549         cmpq    %rax,%rbp
550         ja      .Lcbc_bzero
551
552 .Lcbc_done:
553         leaq    (%rbp),%rsp
554         jmp     .Lcbc_exit
555
556 .align  16
557 .Lcbc_aligned:
558         leaq    (%rsi,%rcx,1),%rbp
559         negq    %rbp
560         andq    $4095,%rbp
561         xorl    %eax,%eax
562         cmpq    $64,%rbp
563         movq    $64-1,%rbp
564         cmovaeq %rax,%rbp
565         andq    %rcx,%rbp
566         subq    %rbp,%rcx
567         jz      .Lcbc_aligned_tail
568         leaq    -16(%rdx),%rax
569         leaq    16(%rdx),%rbx
570         shrq    $4,%rcx
571 .byte   0xf3,0x0f,0xa7,208      
572         movdqa  (%rax),%xmm0
573         movdqa  %xmm0,-16(%rdx)
574         testq   %rbp,%rbp
575         jz      .Lcbc_exit
576
577 .Lcbc_aligned_tail:
578         movq    %rdi,%r8
579         movq    %rbp,%rbx
580         movq    %rbp,%rcx
581         leaq    (%rsp),%rbp
582         subq    %rcx,%rsp
583         shrq    $3,%rcx
584         leaq    (%rsp),%rdi
585 .byte   0xf3,0x48,0xa5          
586         leaq    (%r8),%rdi
587         leaq    (%rsp),%rsi
588         movq    %rbx,%rcx
589         jmp     .Lcbc_loop
590 .Lcbc_exit:
591         movl    $1,%eax
592         leaq    8(%rsp),%rsp
593 .Lcbc_abort:
594         popq    %rbx
595         popq    %rbp
596         .byte   0xf3,0xc3
597 .size   padlock_cbc_encrypt,.-padlock_cbc_encrypt
598 .globl  padlock_cfb_encrypt
599 .type   padlock_cfb_encrypt,@function
600 .align  16
601 padlock_cfb_encrypt:
602         pushq   %rbp
603         pushq   %rbx
604
605         xorl    %eax,%eax
606         testq   $15,%rdx
607         jnz     .Lcfb_abort
608         testq   $15,%rcx
609         jnz     .Lcfb_abort
610         leaq    .Lpadlock_saved_context(%rip),%rax
611         pushf
612         cld
613         call    _padlock_verify_ctx
614         leaq    16(%rdx),%rdx
615         xorl    %eax,%eax
616         xorl    %ebx,%ebx
617         testl   $32,(%rdx)
618         jnz     .Lcfb_aligned
619         testq   $15,%rdi
620         setz    %al
621         testq   $15,%rsi
622         setz    %bl
623         testl   %ebx,%eax
624         jnz     .Lcfb_aligned
625         negq    %rax
626         movq    $512,%rbx
627         notq    %rax
628         leaq    (%rsp),%rbp
629         cmpq    %rbx,%rcx
630         cmovcq  %rcx,%rbx
631         andq    %rbx,%rax
632         movq    %rcx,%rbx
633         negq    %rax
634         andq    $512-1,%rbx
635         leaq    (%rax,%rbp,1),%rsp
636         movq    $512,%rax
637         cmovzq  %rax,%rbx
638         jmp     .Lcfb_loop
639 .align  16
640 .Lcfb_loop:
641         cmpq    %rcx,%rbx
642         cmovaq  %rcx,%rbx
643         movq    %rdi,%r8
644         movq    %rsi,%r9
645         movq    %rcx,%r10
646         movq    %rbx,%rcx
647         movq    %rbx,%r11
648         testq   $15,%rdi
649         cmovnzq %rsp,%rdi
650         testq   $15,%rsi
651         jz      .Lcfb_inp_aligned
652         shrq    $3,%rcx
653 .byte   0xf3,0x48,0xa5          
654         subq    %rbx,%rdi
655         movq    %rbx,%rcx
656         movq    %rdi,%rsi
657 .Lcfb_inp_aligned:
658         leaq    -16(%rdx),%rax
659         leaq    16(%rdx),%rbx
660         shrq    $4,%rcx
661 .byte   0xf3,0x0f,0xa7,224      
662         movdqa  (%rax),%xmm0
663         movdqa  %xmm0,-16(%rdx)
664         movq    %r8,%rdi
665         movq    %r11,%rbx
666         testq   $15,%rdi
667         jz      .Lcfb_out_aligned
668         movq    %rbx,%rcx
669         leaq    (%rsp),%rsi
670         shrq    $3,%rcx
671 .byte   0xf3,0x48,0xa5          
672         subq    %rbx,%rdi
673 .Lcfb_out_aligned:
674         movq    %r9,%rsi
675         movq    %r10,%rcx
676         addq    %rbx,%rdi
677         addq    %rbx,%rsi
678         subq    %rbx,%rcx
679         movq    $512,%rbx
680         jnz     .Lcfb_loop
681         cmpq    %rbp,%rsp
682         je      .Lcfb_done
683
684         pxor    %xmm0,%xmm0
685         leaq    (%rsp),%rax
686 .Lcfb_bzero:
687         movaps  %xmm0,(%rax)
688         leaq    16(%rax),%rax
689         cmpq    %rax,%rbp
690         ja      .Lcfb_bzero
691
692 .Lcfb_done:
693         leaq    (%rbp),%rsp
694         jmp     .Lcfb_exit
695
696 .align  16
697 .Lcfb_aligned:
698         leaq    -16(%rdx),%rax
699         leaq    16(%rdx),%rbx
700         shrq    $4,%rcx
701 .byte   0xf3,0x0f,0xa7,224      
702         movdqa  (%rax),%xmm0
703         movdqa  %xmm0,-16(%rdx)
704 .Lcfb_exit:
705         movl    $1,%eax
706         leaq    8(%rsp),%rsp
707 .Lcfb_abort:
708         popq    %rbx
709         popq    %rbp
710         .byte   0xf3,0xc3
711 .size   padlock_cfb_encrypt,.-padlock_cfb_encrypt
712 .globl  padlock_ofb_encrypt
713 .type   padlock_ofb_encrypt,@function
714 .align  16
715 padlock_ofb_encrypt:
716         pushq   %rbp
717         pushq   %rbx
718
719         xorl    %eax,%eax
720         testq   $15,%rdx
721         jnz     .Lofb_abort
722         testq   $15,%rcx
723         jnz     .Lofb_abort
724         leaq    .Lpadlock_saved_context(%rip),%rax
725         pushf
726         cld
727         call    _padlock_verify_ctx
728         leaq    16(%rdx),%rdx
729         xorl    %eax,%eax
730         xorl    %ebx,%ebx
731         testl   $32,(%rdx)
732         jnz     .Lofb_aligned
733         testq   $15,%rdi
734         setz    %al
735         testq   $15,%rsi
736         setz    %bl
737         testl   %ebx,%eax
738         jnz     .Lofb_aligned
739         negq    %rax
740         movq    $512,%rbx
741         notq    %rax
742         leaq    (%rsp),%rbp
743         cmpq    %rbx,%rcx
744         cmovcq  %rcx,%rbx
745         andq    %rbx,%rax
746         movq    %rcx,%rbx
747         negq    %rax
748         andq    $512-1,%rbx
749         leaq    (%rax,%rbp,1),%rsp
750         movq    $512,%rax
751         cmovzq  %rax,%rbx
752         jmp     .Lofb_loop
753 .align  16
754 .Lofb_loop:
755         cmpq    %rcx,%rbx
756         cmovaq  %rcx,%rbx
757         movq    %rdi,%r8
758         movq    %rsi,%r9
759         movq    %rcx,%r10
760         movq    %rbx,%rcx
761         movq    %rbx,%r11
762         testq   $15,%rdi
763         cmovnzq %rsp,%rdi
764         testq   $15,%rsi
765         jz      .Lofb_inp_aligned
766         shrq    $3,%rcx
767 .byte   0xf3,0x48,0xa5          
768         subq    %rbx,%rdi
769         movq    %rbx,%rcx
770         movq    %rdi,%rsi
771 .Lofb_inp_aligned:
772         leaq    -16(%rdx),%rax
773         leaq    16(%rdx),%rbx
774         shrq    $4,%rcx
775 .byte   0xf3,0x0f,0xa7,232      
776         movdqa  (%rax),%xmm0
777         movdqa  %xmm0,-16(%rdx)
778         movq    %r8,%rdi
779         movq    %r11,%rbx
780         testq   $15,%rdi
781         jz      .Lofb_out_aligned
782         movq    %rbx,%rcx
783         leaq    (%rsp),%rsi
784         shrq    $3,%rcx
785 .byte   0xf3,0x48,0xa5          
786         subq    %rbx,%rdi
787 .Lofb_out_aligned:
788         movq    %r9,%rsi
789         movq    %r10,%rcx
790         addq    %rbx,%rdi
791         addq    %rbx,%rsi
792         subq    %rbx,%rcx
793         movq    $512,%rbx
794         jnz     .Lofb_loop
795         cmpq    %rbp,%rsp
796         je      .Lofb_done
797
798         pxor    %xmm0,%xmm0
799         leaq    (%rsp),%rax
800 .Lofb_bzero:
801         movaps  %xmm0,(%rax)
802         leaq    16(%rax),%rax
803         cmpq    %rax,%rbp
804         ja      .Lofb_bzero
805
806 .Lofb_done:
807         leaq    (%rbp),%rsp
808         jmp     .Lofb_exit
809
810 .align  16
811 .Lofb_aligned:
812         leaq    -16(%rdx),%rax
813         leaq    16(%rdx),%rbx
814         shrq    $4,%rcx
815 .byte   0xf3,0x0f,0xa7,232      
816         movdqa  (%rax),%xmm0
817         movdqa  %xmm0,-16(%rdx)
818 .Lofb_exit:
819         movl    $1,%eax
820         leaq    8(%rsp),%rsp
821 .Lofb_abort:
822         popq    %rbx
823         popq    %rbp
824         .byte   0xf3,0xc3
825 .size   padlock_ofb_encrypt,.-padlock_ofb_encrypt
826 .globl  padlock_ctr32_encrypt
827 .type   padlock_ctr32_encrypt,@function
828 .align  16
829 padlock_ctr32_encrypt:
830         pushq   %rbp
831         pushq   %rbx
832
833         xorl    %eax,%eax
834         testq   $15,%rdx
835         jnz     .Lctr32_abort
836         testq   $15,%rcx
837         jnz     .Lctr32_abort
838         leaq    .Lpadlock_saved_context(%rip),%rax
839         pushf
840         cld
841         call    _padlock_verify_ctx
842         leaq    16(%rdx),%rdx
843         xorl    %eax,%eax
844         xorl    %ebx,%ebx
845         testl   $32,(%rdx)
846         jnz     .Lctr32_aligned
847         testq   $15,%rdi
848         setz    %al
849         testq   $15,%rsi
850         setz    %bl
851         testl   %ebx,%eax
852         jnz     .Lctr32_aligned
853         negq    %rax
854         movq    $512,%rbx
855         notq    %rax
856         leaq    (%rsp),%rbp
857         cmpq    %rbx,%rcx
858         cmovcq  %rcx,%rbx
859         andq    %rbx,%rax
860         movq    %rcx,%rbx
861         negq    %rax
862         andq    $512-1,%rbx
863         leaq    (%rax,%rbp,1),%rsp
864         movq    $512,%rax
865         cmovzq  %rax,%rbx
866 .Lctr32_reenter:
867         movl    -4(%rdx),%eax
868         bswapl  %eax
869         negl    %eax
870         andl    $31,%eax
871         movq    $512,%rbx
872         shll    $4,%eax
873         cmovzq  %rbx,%rax
874         cmpq    %rax,%rcx
875         cmovaq  %rax,%rbx
876         cmovbeq %rcx,%rbx
877         cmpq    %rbx,%rcx
878         ja      .Lctr32_loop
879         movq    %rsi,%rax
880         cmpq    %rsp,%rbp
881         cmoveq  %rdi,%rax
882         addq    %rcx,%rax
883         negq    %rax
884         andq    $4095,%rax
885         cmpq    $32,%rax
886         movq    $-32,%rax
887         cmovaeq %rbx,%rax
888         andq    %rax,%rbx
889         jz      .Lctr32_unaligned_tail
890         jmp     .Lctr32_loop
891 .align  16
892 .Lctr32_loop:
893         cmpq    %rcx,%rbx
894         cmovaq  %rcx,%rbx
895         movq    %rdi,%r8
896         movq    %rsi,%r9
897         movq    %rcx,%r10
898         movq    %rbx,%rcx
899         movq    %rbx,%r11
900         testq   $15,%rdi
901         cmovnzq %rsp,%rdi
902         testq   $15,%rsi
903         jz      .Lctr32_inp_aligned
904         shrq    $3,%rcx
905 .byte   0xf3,0x48,0xa5          
906         subq    %rbx,%rdi
907         movq    %rbx,%rcx
908         movq    %rdi,%rsi
909 .Lctr32_inp_aligned:
910         leaq    -16(%rdx),%rax
911         leaq    16(%rdx),%rbx
912         shrq    $4,%rcx
913 .byte   0xf3,0x0f,0xa7,216      
914         movl    -4(%rdx),%eax
915         testl   $4294901760,%eax
916         jnz     .Lctr32_no_carry
917         bswapl  %eax
918         addl    $65536,%eax
919         bswapl  %eax
920         movl    %eax,-4(%rdx)
921 .Lctr32_no_carry:
922         movq    %r8,%rdi
923         movq    %r11,%rbx
924         testq   $15,%rdi
925         jz      .Lctr32_out_aligned
926         movq    %rbx,%rcx
927         leaq    (%rsp),%rsi
928         shrq    $3,%rcx
929 .byte   0xf3,0x48,0xa5          
930         subq    %rbx,%rdi
931 .Lctr32_out_aligned:
932         movq    %r9,%rsi
933         movq    %r10,%rcx
934         addq    %rbx,%rdi
935         addq    %rbx,%rsi
936         subq    %rbx,%rcx
937         movq    $512,%rbx
938         jz      .Lctr32_break
939         cmpq    %rbx,%rcx
940         jae     .Lctr32_loop
941         movq    %rcx,%rbx
942         movq    %rsi,%rax
943         cmpq    %rsp,%rbp
944         cmoveq  %rdi,%rax
945         addq    %rcx,%rax
946         negq    %rax
947         andq    $4095,%rax
948         cmpq    $32,%rax
949         movq    $-32,%rax
950         cmovaeq %rbx,%rax
951         andq    %rax,%rbx
952         jnz     .Lctr32_loop
953 .Lctr32_unaligned_tail:
954         xorl    %eax,%eax
955         cmpq    %rsp,%rbp
956         cmoveq  %rcx,%rax
957         movq    %rdi,%r8
958         movq    %rcx,%rbx
959         subq    %rax,%rsp
960         shrq    $3,%rcx
961         leaq    (%rsp),%rdi
962 .byte   0xf3,0x48,0xa5          
963         movq    %rsp,%rsi
964         movq    %r8,%rdi
965         movq    %rbx,%rcx
966         jmp     .Lctr32_loop
967 .align  16
968 .Lctr32_break:
969         cmpq    %rbp,%rsp
970         je      .Lctr32_done
971
972         pxor    %xmm0,%xmm0
973         leaq    (%rsp),%rax
974 .Lctr32_bzero:
975         movaps  %xmm0,(%rax)
976         leaq    16(%rax),%rax
977         cmpq    %rax,%rbp
978         ja      .Lctr32_bzero
979
980 .Lctr32_done:
981         leaq    (%rbp),%rsp
982         jmp     .Lctr32_exit
983
984 .align  16
985 .Lctr32_aligned:
986         movl    -4(%rdx),%eax
987         bswapl  %eax
988         negl    %eax
989         andl    $65535,%eax
990         movq    $1048576,%rbx
991         shll    $4,%eax
992         cmovzq  %rbx,%rax
993         cmpq    %rax,%rcx
994         cmovaq  %rax,%rbx
995         cmovbeq %rcx,%rbx
996         jbe     .Lctr32_aligned_skip
997
998 .Lctr32_aligned_loop:
999         movq    %rcx,%r10
1000         movq    %rbx,%rcx
1001         movq    %rbx,%r11
1002
1003         leaq    -16(%rdx),%rax
1004         leaq    16(%rdx),%rbx
1005         shrq    $4,%rcx
1006 .byte   0xf3,0x0f,0xa7,216      
1007
1008         movl    -4(%rdx),%eax
1009         bswapl  %eax
1010         addl    $65536,%eax
1011         bswapl  %eax
1012         movl    %eax,-4(%rdx)
1013
1014         movq    %r10,%rcx
1015         subq    %r11,%rcx
1016         movq    $1048576,%rbx
1017         jz      .Lctr32_exit
1018         cmpq    %rbx,%rcx
1019         jae     .Lctr32_aligned_loop
1020
1021 .Lctr32_aligned_skip:
1022         leaq    (%rsi,%rcx,1),%rbp
1023         negq    %rbp
1024         andq    $4095,%rbp
1025         xorl    %eax,%eax
1026         cmpq    $32,%rbp
1027         movq    $32-1,%rbp
1028         cmovaeq %rax,%rbp
1029         andq    %rcx,%rbp
1030         subq    %rbp,%rcx
1031         jz      .Lctr32_aligned_tail
1032         leaq    -16(%rdx),%rax
1033         leaq    16(%rdx),%rbx
1034         shrq    $4,%rcx
1035 .byte   0xf3,0x0f,0xa7,216      
1036         testq   %rbp,%rbp
1037         jz      .Lctr32_exit
1038
1039 .Lctr32_aligned_tail:
1040         movq    %rdi,%r8
1041         movq    %rbp,%rbx
1042         movq    %rbp,%rcx
1043         leaq    (%rsp),%rbp
1044         subq    %rcx,%rsp
1045         shrq    $3,%rcx
1046         leaq    (%rsp),%rdi
1047 .byte   0xf3,0x48,0xa5          
1048         leaq    (%r8),%rdi
1049         leaq    (%rsp),%rsi
1050         movq    %rbx,%rcx
1051         jmp     .Lctr32_loop
1052 .Lctr32_exit:
1053         movl    $1,%eax
1054         leaq    8(%rsp),%rsp
1055 .Lctr32_abort:
1056         popq    %rbx
1057         popq    %rbp
1058         .byte   0xf3,0xc3
1059 .size   padlock_ctr32_encrypt,.-padlock_ctr32_encrypt
1060 .byte   86,73,65,32,80,97,100,108,111,99,107,32,120,56,54,95,54,52,32,109,111,100,117,108,101,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1061 .align  16
1062 .data   
1063 .align  8
1064 .Lpadlock_saved_context:
1065 .quad   0
1066
1067 .section .note.GNU-stack,"",%progbits