Utilize the optimized SHA functions in Padlock HMAC.
[gnutls:gnutls.git] / lib / accelerated / x86 / elf / appro-aes-gcm-x86-64.s
1 # Copyright (c) 2011-2012, Andy Polyakov <appro@openssl.org>
2 # All rights reserved.
3 #
4 # Redistribution and use in source and binary forms, with or without
5 # modification, are permitted provided that the following conditions
6 # are met:
7
8 #     * Redistributions of source code must retain copyright notices,
9 #      this list of conditions and the following disclaimer.
10 #
11 #     * Redistributions in binary form must reproduce the above
12 #      copyright notice, this list of conditions and the following
13 #      disclaimer in the documentation and/or other materials
14 #      provided with the distribution.
15 #
16 #     * Neither the name of the Andy Polyakov nor the names of its
17 #      copyright holder and contributors may be used to endorse or
18 #      promote products derived from this software without specific
19 #      prior written permission.
20 #
21 # ALTERNATIVELY, provided that this notice is retained in full, this
22 # product may be distributed under the terms of the GNU General Public
23 # License (GPL), in which case the provisions of the GPL apply INSTEAD OF
24 # those given above.
25 #
26 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
27 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 #
38 # *** This file is auto-generated ***
39 #
40 .text   
41
42 .globl  gcm_gmult_4bit
43 .type   gcm_gmult_4bit,@function
44 .align  16
45 gcm_gmult_4bit:
46         pushq   %rbx
47         pushq   %rbp
48         pushq   %r12
49 .Lgmult_prologue:
50
51         movzbq  15(%rdi),%r8
52         leaq    .Lrem_4bit(%rip),%r11
53         xorq    %rax,%rax
54         xorq    %rbx,%rbx
55         movb    %r8b,%al
56         movb    %r8b,%bl
57         shlb    $4,%al
58         movq    $14,%rcx
59         movq    8(%rsi,%rax,1),%r8
60         movq    (%rsi,%rax,1),%r9
61         andb    $240,%bl
62         movq    %r8,%rdx
63         jmp     .Loop1
64
65 .align  16
66 .Loop1:
67         shrq    $4,%r8
68         andq    $15,%rdx
69         movq    %r9,%r10
70         movb    (%rdi,%rcx,1),%al
71         shrq    $4,%r9
72         xorq    8(%rsi,%rbx,1),%r8
73         shlq    $60,%r10
74         xorq    (%rsi,%rbx,1),%r9
75         movb    %al,%bl
76         xorq    (%r11,%rdx,8),%r9
77         movq    %r8,%rdx
78         shlb    $4,%al
79         xorq    %r10,%r8
80         decq    %rcx
81         js      .Lbreak1
82
83         shrq    $4,%r8
84         andq    $15,%rdx
85         movq    %r9,%r10
86         shrq    $4,%r9
87         xorq    8(%rsi,%rax,1),%r8
88         shlq    $60,%r10
89         xorq    (%rsi,%rax,1),%r9
90         andb    $240,%bl
91         xorq    (%r11,%rdx,8),%r9
92         movq    %r8,%rdx
93         xorq    %r10,%r8
94         jmp     .Loop1
95
96 .align  16
97 .Lbreak1:
98         shrq    $4,%r8
99         andq    $15,%rdx
100         movq    %r9,%r10
101         shrq    $4,%r9
102         xorq    8(%rsi,%rax,1),%r8
103         shlq    $60,%r10
104         xorq    (%rsi,%rax,1),%r9
105         andb    $240,%bl
106         xorq    (%r11,%rdx,8),%r9
107         movq    %r8,%rdx
108         xorq    %r10,%r8
109
110         shrq    $4,%r8
111         andq    $15,%rdx
112         movq    %r9,%r10
113         shrq    $4,%r9
114         xorq    8(%rsi,%rbx,1),%r8
115         shlq    $60,%r10
116         xorq    (%rsi,%rbx,1),%r9
117         xorq    %r10,%r8
118         xorq    (%r11,%rdx,8),%r9
119
120         bswapq  %r8
121         bswapq  %r9
122         movq    %r8,8(%rdi)
123         movq    %r9,(%rdi)
124
125         movq    16(%rsp),%rbx
126         leaq    24(%rsp),%rsp
127 .Lgmult_epilogue:
128         .byte   0xf3,0xc3
129 .size   gcm_gmult_4bit,.-gcm_gmult_4bit
130 .globl  gcm_ghash_4bit
131 .type   gcm_ghash_4bit,@function
132 .align  16
133 gcm_ghash_4bit:
134         pushq   %rbx
135         pushq   %rbp
136         pushq   %r12
137         pushq   %r13
138         pushq   %r14
139         pushq   %r15
140         subq    $280,%rsp
141 .Lghash_prologue:
142         movq    %rdx,%r14
143         movq    %rcx,%r15
144         subq    $-128,%rsi
145         leaq    16+128(%rsp),%rbp
146         xorl    %edx,%edx
147         movq    0+0-128(%rsi),%r8
148         movq    0+8-128(%rsi),%rax
149         movb    %al,%dl
150         shrq    $4,%rax
151         movq    %r8,%r10
152         shrq    $4,%r8
153         movq    16+0-128(%rsi),%r9
154         shlb    $4,%dl
155         movq    16+8-128(%rsi),%rbx
156         shlq    $60,%r10
157         movb    %dl,0(%rsp)
158         orq     %r10,%rax
159         movb    %bl,%dl
160         shrq    $4,%rbx
161         movq    %r9,%r10
162         shrq    $4,%r9
163         movq    %r8,0(%rbp)
164         movq    32+0-128(%rsi),%r8
165         shlb    $4,%dl
166         movq    %rax,0-128(%rbp)
167         movq    32+8-128(%rsi),%rax
168         shlq    $60,%r10
169         movb    %dl,1(%rsp)
170         orq     %r10,%rbx
171         movb    %al,%dl
172         shrq    $4,%rax
173         movq    %r8,%r10
174         shrq    $4,%r8
175         movq    %r9,8(%rbp)
176         movq    48+0-128(%rsi),%r9
177         shlb    $4,%dl
178         movq    %rbx,8-128(%rbp)
179         movq    48+8-128(%rsi),%rbx
180         shlq    $60,%r10
181         movb    %dl,2(%rsp)
182         orq     %r10,%rax
183         movb    %bl,%dl
184         shrq    $4,%rbx
185         movq    %r9,%r10
186         shrq    $4,%r9
187         movq    %r8,16(%rbp)
188         movq    64+0-128(%rsi),%r8
189         shlb    $4,%dl
190         movq    %rax,16-128(%rbp)
191         movq    64+8-128(%rsi),%rax
192         shlq    $60,%r10
193         movb    %dl,3(%rsp)
194         orq     %r10,%rbx
195         movb    %al,%dl
196         shrq    $4,%rax
197         movq    %r8,%r10
198         shrq    $4,%r8
199         movq    %r9,24(%rbp)
200         movq    80+0-128(%rsi),%r9
201         shlb    $4,%dl
202         movq    %rbx,24-128(%rbp)
203         movq    80+8-128(%rsi),%rbx
204         shlq    $60,%r10
205         movb    %dl,4(%rsp)
206         orq     %r10,%rax
207         movb    %bl,%dl
208         shrq    $4,%rbx
209         movq    %r9,%r10
210         shrq    $4,%r9
211         movq    %r8,32(%rbp)
212         movq    96+0-128(%rsi),%r8
213         shlb    $4,%dl
214         movq    %rax,32-128(%rbp)
215         movq    96+8-128(%rsi),%rax
216         shlq    $60,%r10
217         movb    %dl,5(%rsp)
218         orq     %r10,%rbx
219         movb    %al,%dl
220         shrq    $4,%rax
221         movq    %r8,%r10
222         shrq    $4,%r8
223         movq    %r9,40(%rbp)
224         movq    112+0-128(%rsi),%r9
225         shlb    $4,%dl
226         movq    %rbx,40-128(%rbp)
227         movq    112+8-128(%rsi),%rbx
228         shlq    $60,%r10
229         movb    %dl,6(%rsp)
230         orq     %r10,%rax
231         movb    %bl,%dl
232         shrq    $4,%rbx
233         movq    %r9,%r10
234         shrq    $4,%r9
235         movq    %r8,48(%rbp)
236         movq    128+0-128(%rsi),%r8
237         shlb    $4,%dl
238         movq    %rax,48-128(%rbp)
239         movq    128+8-128(%rsi),%rax
240         shlq    $60,%r10
241         movb    %dl,7(%rsp)
242         orq     %r10,%rbx
243         movb    %al,%dl
244         shrq    $4,%rax
245         movq    %r8,%r10
246         shrq    $4,%r8
247         movq    %r9,56(%rbp)
248         movq    144+0-128(%rsi),%r9
249         shlb    $4,%dl
250         movq    %rbx,56-128(%rbp)
251         movq    144+8-128(%rsi),%rbx
252         shlq    $60,%r10
253         movb    %dl,8(%rsp)
254         orq     %r10,%rax
255         movb    %bl,%dl
256         shrq    $4,%rbx
257         movq    %r9,%r10
258         shrq    $4,%r9
259         movq    %r8,64(%rbp)
260         movq    160+0-128(%rsi),%r8
261         shlb    $4,%dl
262         movq    %rax,64-128(%rbp)
263         movq    160+8-128(%rsi),%rax
264         shlq    $60,%r10
265         movb    %dl,9(%rsp)
266         orq     %r10,%rbx
267         movb    %al,%dl
268         shrq    $4,%rax
269         movq    %r8,%r10
270         shrq    $4,%r8
271         movq    %r9,72(%rbp)
272         movq    176+0-128(%rsi),%r9
273         shlb    $4,%dl
274         movq    %rbx,72-128(%rbp)
275         movq    176+8-128(%rsi),%rbx
276         shlq    $60,%r10
277         movb    %dl,10(%rsp)
278         orq     %r10,%rax
279         movb    %bl,%dl
280         shrq    $4,%rbx
281         movq    %r9,%r10
282         shrq    $4,%r9
283         movq    %r8,80(%rbp)
284         movq    192+0-128(%rsi),%r8
285         shlb    $4,%dl
286         movq    %rax,80-128(%rbp)
287         movq    192+8-128(%rsi),%rax
288         shlq    $60,%r10
289         movb    %dl,11(%rsp)
290         orq     %r10,%rbx
291         movb    %al,%dl
292         shrq    $4,%rax
293         movq    %r8,%r10
294         shrq    $4,%r8
295         movq    %r9,88(%rbp)
296         movq    208+0-128(%rsi),%r9
297         shlb    $4,%dl
298         movq    %rbx,88-128(%rbp)
299         movq    208+8-128(%rsi),%rbx
300         shlq    $60,%r10
301         movb    %dl,12(%rsp)
302         orq     %r10,%rax
303         movb    %bl,%dl
304         shrq    $4,%rbx
305         movq    %r9,%r10
306         shrq    $4,%r9
307         movq    %r8,96(%rbp)
308         movq    224+0-128(%rsi),%r8
309         shlb    $4,%dl
310         movq    %rax,96-128(%rbp)
311         movq    224+8-128(%rsi),%rax
312         shlq    $60,%r10
313         movb    %dl,13(%rsp)
314         orq     %r10,%rbx
315         movb    %al,%dl
316         shrq    $4,%rax
317         movq    %r8,%r10
318         shrq    $4,%r8
319         movq    %r9,104(%rbp)
320         movq    240+0-128(%rsi),%r9
321         shlb    $4,%dl
322         movq    %rbx,104-128(%rbp)
323         movq    240+8-128(%rsi),%rbx
324         shlq    $60,%r10
325         movb    %dl,14(%rsp)
326         orq     %r10,%rax
327         movb    %bl,%dl
328         shrq    $4,%rbx
329         movq    %r9,%r10
330         shrq    $4,%r9
331         movq    %r8,112(%rbp)
332         shlb    $4,%dl
333         movq    %rax,112-128(%rbp)
334         shlq    $60,%r10
335         movb    %dl,15(%rsp)
336         orq     %r10,%rbx
337         movq    %r9,120(%rbp)
338         movq    %rbx,120-128(%rbp)
339         addq    $-128,%rsi
340         movq    8(%rdi),%r8
341         movq    0(%rdi),%r9
342         addq    %r14,%r15
343         leaq    .Lrem_8bit(%rip),%r11
344         jmp     .Louter_loop
345 .align  16
346 .Louter_loop:
347         xorq    (%r14),%r9
348         movq    8(%r14),%rdx
349         leaq    16(%r14),%r14
350         xorq    %r8,%rdx
351         movq    %r9,(%rdi)
352         movq    %rdx,8(%rdi)
353         shrq    $32,%rdx
354         xorq    %rax,%rax
355         roll    $8,%edx
356         movb    %dl,%al
357         movzbl  %dl,%ebx
358         shlb    $4,%al
359         shrl    $4,%ebx
360         roll    $8,%edx
361         movq    8(%rsi,%rax,1),%r8
362         movq    (%rsi,%rax,1),%r9
363         movb    %dl,%al
364         movzbl  %dl,%ecx
365         shlb    $4,%al
366         movzbq  (%rsp,%rbx,1),%r12
367         shrl    $4,%ecx
368         xorq    %r8,%r12
369         movq    %r9,%r10
370         shrq    $8,%r8
371         movzbq  %r12b,%r12
372         shrq    $8,%r9
373         xorq    -128(%rbp,%rbx,8),%r8
374         shlq    $56,%r10
375         xorq    (%rbp,%rbx,8),%r9
376         roll    $8,%edx
377         xorq    8(%rsi,%rax,1),%r8
378         xorq    (%rsi,%rax,1),%r9
379         movb    %dl,%al
380         xorq    %r10,%r8
381         movzwq  (%r11,%r12,2),%r12
382         movzbl  %dl,%ebx
383         shlb    $4,%al
384         movzbq  (%rsp,%rcx,1),%r13
385         shrl    $4,%ebx
386         shlq    $48,%r12
387         xorq    %r8,%r13
388         movq    %r9,%r10
389         xorq    %r12,%r9
390         shrq    $8,%r8
391         movzbq  %r13b,%r13
392         shrq    $8,%r9
393         xorq    -128(%rbp,%rcx,8),%r8
394         shlq    $56,%r10
395         xorq    (%rbp,%rcx,8),%r9
396         roll    $8,%edx
397         xorq    8(%rsi,%rax,1),%r8
398         xorq    (%rsi,%rax,1),%r9
399         movb    %dl,%al
400         xorq    %r10,%r8
401         movzwq  (%r11,%r13,2),%r13
402         movzbl  %dl,%ecx
403         shlb    $4,%al
404         movzbq  (%rsp,%rbx,1),%r12
405         shrl    $4,%ecx
406         shlq    $48,%r13
407         xorq    %r8,%r12
408         movq    %r9,%r10
409         xorq    %r13,%r9
410         shrq    $8,%r8
411         movzbq  %r12b,%r12
412         movl    8(%rdi),%edx
413         shrq    $8,%r9
414         xorq    -128(%rbp,%rbx,8),%r8
415         shlq    $56,%r10
416         xorq    (%rbp,%rbx,8),%r9
417         roll    $8,%edx
418         xorq    8(%rsi,%rax,1),%r8
419         xorq    (%rsi,%rax,1),%r9
420         movb    %dl,%al
421         xorq    %r10,%r8
422         movzwq  (%r11,%r12,2),%r12
423         movzbl  %dl,%ebx
424         shlb    $4,%al
425         movzbq  (%rsp,%rcx,1),%r13
426         shrl    $4,%ebx
427         shlq    $48,%r12
428         xorq    %r8,%r13
429         movq    %r9,%r10
430         xorq    %r12,%r9
431         shrq    $8,%r8
432         movzbq  %r13b,%r13
433         shrq    $8,%r9
434         xorq    -128(%rbp,%rcx,8),%r8
435         shlq    $56,%r10
436         xorq    (%rbp,%rcx,8),%r9
437         roll    $8,%edx
438         xorq    8(%rsi,%rax,1),%r8
439         xorq    (%rsi,%rax,1),%r9
440         movb    %dl,%al
441         xorq    %r10,%r8
442         movzwq  (%r11,%r13,2),%r13
443         movzbl  %dl,%ecx
444         shlb    $4,%al
445         movzbq  (%rsp,%rbx,1),%r12
446         shrl    $4,%ecx
447         shlq    $48,%r13
448         xorq    %r8,%r12
449         movq    %r9,%r10
450         xorq    %r13,%r9
451         shrq    $8,%r8
452         movzbq  %r12b,%r12
453         shrq    $8,%r9
454         xorq    -128(%rbp,%rbx,8),%r8
455         shlq    $56,%r10
456         xorq    (%rbp,%rbx,8),%r9
457         roll    $8,%edx
458         xorq    8(%rsi,%rax,1),%r8
459         xorq    (%rsi,%rax,1),%r9
460         movb    %dl,%al
461         xorq    %r10,%r8
462         movzwq  (%r11,%r12,2),%r12
463         movzbl  %dl,%ebx
464         shlb    $4,%al
465         movzbq  (%rsp,%rcx,1),%r13
466         shrl    $4,%ebx
467         shlq    $48,%r12
468         xorq    %r8,%r13
469         movq    %r9,%r10
470         xorq    %r12,%r9
471         shrq    $8,%r8
472         movzbq  %r13b,%r13
473         shrq    $8,%r9
474         xorq    -128(%rbp,%rcx,8),%r8
475         shlq    $56,%r10
476         xorq    (%rbp,%rcx,8),%r9
477         roll    $8,%edx
478         xorq    8(%rsi,%rax,1),%r8
479         xorq    (%rsi,%rax,1),%r9
480         movb    %dl,%al
481         xorq    %r10,%r8
482         movzwq  (%r11,%r13,2),%r13
483         movzbl  %dl,%ecx
484         shlb    $4,%al
485         movzbq  (%rsp,%rbx,1),%r12
486         shrl    $4,%ecx
487         shlq    $48,%r13
488         xorq    %r8,%r12
489         movq    %r9,%r10
490         xorq    %r13,%r9
491         shrq    $8,%r8
492         movzbq  %r12b,%r12
493         movl    4(%rdi),%edx
494         shrq    $8,%r9
495         xorq    -128(%rbp,%rbx,8),%r8
496         shlq    $56,%r10
497         xorq    (%rbp,%rbx,8),%r9
498         roll    $8,%edx
499         xorq    8(%rsi,%rax,1),%r8
500         xorq    (%rsi,%rax,1),%r9
501         movb    %dl,%al
502         xorq    %r10,%r8
503         movzwq  (%r11,%r12,2),%r12
504         movzbl  %dl,%ebx
505         shlb    $4,%al
506         movzbq  (%rsp,%rcx,1),%r13
507         shrl    $4,%ebx
508         shlq    $48,%r12
509         xorq    %r8,%r13
510         movq    %r9,%r10
511         xorq    %r12,%r9
512         shrq    $8,%r8
513         movzbq  %r13b,%r13
514         shrq    $8,%r9
515         xorq    -128(%rbp,%rcx,8),%r8
516         shlq    $56,%r10
517         xorq    (%rbp,%rcx,8),%r9
518         roll    $8,%edx
519         xorq    8(%rsi,%rax,1),%r8
520         xorq    (%rsi,%rax,1),%r9
521         movb    %dl,%al
522         xorq    %r10,%r8
523         movzwq  (%r11,%r13,2),%r13
524         movzbl  %dl,%ecx
525         shlb    $4,%al
526         movzbq  (%rsp,%rbx,1),%r12
527         shrl    $4,%ecx
528         shlq    $48,%r13
529         xorq    %r8,%r12
530         movq    %r9,%r10
531         xorq    %r13,%r9
532         shrq    $8,%r8
533         movzbq  %r12b,%r12
534         shrq    $8,%r9
535         xorq    -128(%rbp,%rbx,8),%r8
536         shlq    $56,%r10
537         xorq    (%rbp,%rbx,8),%r9
538         roll    $8,%edx
539         xorq    8(%rsi,%rax,1),%r8
540         xorq    (%rsi,%rax,1),%r9
541         movb    %dl,%al
542         xorq    %r10,%r8
543         movzwq  (%r11,%r12,2),%r12
544         movzbl  %dl,%ebx
545         shlb    $4,%al
546         movzbq  (%rsp,%rcx,1),%r13
547         shrl    $4,%ebx
548         shlq    $48,%r12
549         xorq    %r8,%r13
550         movq    %r9,%r10
551         xorq    %r12,%r9
552         shrq    $8,%r8
553         movzbq  %r13b,%r13
554         shrq    $8,%r9
555         xorq    -128(%rbp,%rcx,8),%r8
556         shlq    $56,%r10
557         xorq    (%rbp,%rcx,8),%r9
558         roll    $8,%edx
559         xorq    8(%rsi,%rax,1),%r8
560         xorq    (%rsi,%rax,1),%r9
561         movb    %dl,%al
562         xorq    %r10,%r8
563         movzwq  (%r11,%r13,2),%r13
564         movzbl  %dl,%ecx
565         shlb    $4,%al
566         movzbq  (%rsp,%rbx,1),%r12
567         shrl    $4,%ecx
568         shlq    $48,%r13
569         xorq    %r8,%r12
570         movq    %r9,%r10
571         xorq    %r13,%r9
572         shrq    $8,%r8
573         movzbq  %r12b,%r12
574         movl    0(%rdi),%edx
575         shrq    $8,%r9
576         xorq    -128(%rbp,%rbx,8),%r8
577         shlq    $56,%r10
578         xorq    (%rbp,%rbx,8),%r9
579         roll    $8,%edx
580         xorq    8(%rsi,%rax,1),%r8
581         xorq    (%rsi,%rax,1),%r9
582         movb    %dl,%al
583         xorq    %r10,%r8
584         movzwq  (%r11,%r12,2),%r12
585         movzbl  %dl,%ebx
586         shlb    $4,%al
587         movzbq  (%rsp,%rcx,1),%r13
588         shrl    $4,%ebx
589         shlq    $48,%r12
590         xorq    %r8,%r13
591         movq    %r9,%r10
592         xorq    %r12,%r9
593         shrq    $8,%r8
594         movzbq  %r13b,%r13
595         shrq    $8,%r9
596         xorq    -128(%rbp,%rcx,8),%r8
597         shlq    $56,%r10
598         xorq    (%rbp,%rcx,8),%r9
599         roll    $8,%edx
600         xorq    8(%rsi,%rax,1),%r8
601         xorq    (%rsi,%rax,1),%r9
602         movb    %dl,%al
603         xorq    %r10,%r8
604         movzwq  (%r11,%r13,2),%r13
605         movzbl  %dl,%ecx
606         shlb    $4,%al
607         movzbq  (%rsp,%rbx,1),%r12
608         shrl    $4,%ecx
609         shlq    $48,%r13
610         xorq    %r8,%r12
611         movq    %r9,%r10
612         xorq    %r13,%r9
613         shrq    $8,%r8
614         movzbq  %r12b,%r12
615         shrq    $8,%r9
616         xorq    -128(%rbp,%rbx,8),%r8
617         shlq    $56,%r10
618         xorq    (%rbp,%rbx,8),%r9
619         roll    $8,%edx
620         xorq    8(%rsi,%rax,1),%r8
621         xorq    (%rsi,%rax,1),%r9
622         movb    %dl,%al
623         xorq    %r10,%r8
624         movzwq  (%r11,%r12,2),%r12
625         movzbl  %dl,%ebx
626         shlb    $4,%al
627         movzbq  (%rsp,%rcx,1),%r13
628         shrl    $4,%ebx
629         shlq    $48,%r12
630         xorq    %r8,%r13
631         movq    %r9,%r10
632         xorq    %r12,%r9
633         shrq    $8,%r8
634         movzbq  %r13b,%r13
635         shrq    $8,%r9
636         xorq    -128(%rbp,%rcx,8),%r8
637         shlq    $56,%r10
638         xorq    (%rbp,%rcx,8),%r9
639         roll    $8,%edx
640         xorq    8(%rsi,%rax,1),%r8
641         xorq    (%rsi,%rax,1),%r9
642         movb    %dl,%al
643         xorq    %r10,%r8
644         movzwq  (%r11,%r13,2),%r13
645         movzbl  %dl,%ecx
646         shlb    $4,%al
647         movzbq  (%rsp,%rbx,1),%r12
648         andl    $240,%ecx
649         shlq    $48,%r13
650         xorq    %r8,%r12
651         movq    %r9,%r10
652         xorq    %r13,%r9
653         shrq    $8,%r8
654         movzbq  %r12b,%r12
655         movl    -4(%rdi),%edx
656         shrq    $8,%r9
657         xorq    -128(%rbp,%rbx,8),%r8
658         shlq    $56,%r10
659         xorq    (%rbp,%rbx,8),%r9
660         movzwq  (%r11,%r12,2),%r12
661         xorq    8(%rsi,%rax,1),%r8
662         xorq    (%rsi,%rax,1),%r9
663         shlq    $48,%r12
664         xorq    %r10,%r8
665         xorq    %r12,%r9
666         movzbq  %r8b,%r13
667         shrq    $4,%r8
668         movq    %r9,%r10
669         shlb    $4,%r13b
670         shrq    $4,%r9
671         xorq    8(%rsi,%rcx,1),%r8
672         movzwq  (%r11,%r13,2),%r13
673         shlq    $60,%r10
674         xorq    (%rsi,%rcx,1),%r9
675         xorq    %r10,%r8
676         shlq    $48,%r13
677         bswapq  %r8
678         xorq    %r13,%r9
679         bswapq  %r9
680         cmpq    %r15,%r14
681         jb      .Louter_loop
682         movq    %r8,8(%rdi)
683         movq    %r9,(%rdi)
684
685         leaq    280(%rsp),%rsi
686         movq    0(%rsi),%r15
687         movq    8(%rsi),%r14
688         movq    16(%rsi),%r13
689         movq    24(%rsi),%r12
690         movq    32(%rsi),%rbp
691         movq    40(%rsi),%rbx
692         leaq    48(%rsi),%rsp
693 .Lghash_epilogue:
694         .byte   0xf3,0xc3
695 .size   gcm_ghash_4bit,.-gcm_ghash_4bit
696 .globl  gcm_init_clmul
697 .type   gcm_init_clmul,@function
698 .align  16
699 gcm_init_clmul:
700 .L_init_clmul:
701         movdqu  (%rsi),%xmm2
702         pshufd  $78,%xmm2,%xmm2
703
704
705         pshufd  $255,%xmm2,%xmm4
706         movdqa  %xmm2,%xmm3
707         psllq   $1,%xmm2
708         pxor    %xmm5,%xmm5
709         psrlq   $63,%xmm3
710         pcmpgtd %xmm4,%xmm5
711         pslldq  $8,%xmm3
712         por     %xmm3,%xmm2
713
714
715         pand    .L0x1c2_polynomial(%rip),%xmm5
716         pxor    %xmm5,%xmm2
717
718
719         pshufd  $78,%xmm2,%xmm6
720         movdqa  %xmm2,%xmm0
721         pxor    %xmm2,%xmm6
722         movdqa  %xmm0,%xmm1
723         pshufd  $78,%xmm0,%xmm3
724         pxor    %xmm0,%xmm3
725 .byte   102,15,58,68,194,0
726 .byte   102,15,58,68,202,17
727 .byte   102,15,58,68,222,0
728         pxor    %xmm0,%xmm3
729         pxor    %xmm1,%xmm3
730
731         movdqa  %xmm3,%xmm4
732         psrldq  $8,%xmm3
733         pslldq  $8,%xmm4
734         pxor    %xmm3,%xmm1
735         pxor    %xmm4,%xmm0
736
737         movdqa  %xmm0,%xmm4
738         movdqa  %xmm0,%xmm3
739         psllq   $5,%xmm0
740         pxor    %xmm0,%xmm3
741         psllq   $1,%xmm0
742         pxor    %xmm3,%xmm0
743         psllq   $57,%xmm0
744         movdqa  %xmm0,%xmm3
745         pslldq  $8,%xmm0
746         psrldq  $8,%xmm3
747         pxor    %xmm4,%xmm0
748         pxor    %xmm3,%xmm1
749
750
751         movdqa  %xmm0,%xmm4
752         psrlq   $1,%xmm0
753         pxor    %xmm4,%xmm1
754         pxor    %xmm0,%xmm4
755         psrlq   $5,%xmm0
756         pxor    %xmm4,%xmm0
757         psrlq   $1,%xmm0
758         pxor    %xmm1,%xmm0
759         pshufd  $78,%xmm2,%xmm3
760         pshufd  $78,%xmm0,%xmm4
761         pxor    %xmm2,%xmm3
762         movdqu  %xmm2,0(%rdi)
763         pxor    %xmm0,%xmm4
764         movdqu  %xmm0,16(%rdi)
765 .byte   102,15,58,15,227,8
766         movdqu  %xmm4,32(%rdi)
767         movdqa  %xmm0,%xmm1
768         pshufd  $78,%xmm0,%xmm3
769         pxor    %xmm0,%xmm3
770 .byte   102,15,58,68,194,0
771 .byte   102,15,58,68,202,17
772 .byte   102,15,58,68,222,0
773         pxor    %xmm0,%xmm3
774         pxor    %xmm1,%xmm3
775
776         movdqa  %xmm3,%xmm4
777         psrldq  $8,%xmm3
778         pslldq  $8,%xmm4
779         pxor    %xmm3,%xmm1
780         pxor    %xmm4,%xmm0
781
782         movdqa  %xmm0,%xmm4
783         movdqa  %xmm0,%xmm3
784         psllq   $5,%xmm0
785         pxor    %xmm0,%xmm3
786         psllq   $1,%xmm0
787         pxor    %xmm3,%xmm0
788         psllq   $57,%xmm0
789         movdqa  %xmm0,%xmm3
790         pslldq  $8,%xmm0
791         psrldq  $8,%xmm3
792         pxor    %xmm4,%xmm0
793         pxor    %xmm3,%xmm1
794
795
796         movdqa  %xmm0,%xmm4
797         psrlq   $1,%xmm0
798         pxor    %xmm4,%xmm1
799         pxor    %xmm0,%xmm4
800         psrlq   $5,%xmm0
801         pxor    %xmm4,%xmm0
802         psrlq   $1,%xmm0
803         pxor    %xmm1,%xmm0
804         movdqa  %xmm0,%xmm5
805         movdqa  %xmm0,%xmm1
806         pshufd  $78,%xmm0,%xmm3
807         pxor    %xmm0,%xmm3
808 .byte   102,15,58,68,194,0
809 .byte   102,15,58,68,202,17
810 .byte   102,15,58,68,222,0
811         pxor    %xmm0,%xmm3
812         pxor    %xmm1,%xmm3
813
814         movdqa  %xmm3,%xmm4
815         psrldq  $8,%xmm3
816         pslldq  $8,%xmm4
817         pxor    %xmm3,%xmm1
818         pxor    %xmm4,%xmm0
819
820         movdqa  %xmm0,%xmm4
821         movdqa  %xmm0,%xmm3
822         psllq   $5,%xmm0
823         pxor    %xmm0,%xmm3
824         psllq   $1,%xmm0
825         pxor    %xmm3,%xmm0
826         psllq   $57,%xmm0
827         movdqa  %xmm0,%xmm3
828         pslldq  $8,%xmm0
829         psrldq  $8,%xmm3
830         pxor    %xmm4,%xmm0
831         pxor    %xmm3,%xmm1
832
833
834         movdqa  %xmm0,%xmm4
835         psrlq   $1,%xmm0
836         pxor    %xmm4,%xmm1
837         pxor    %xmm0,%xmm4
838         psrlq   $5,%xmm0
839         pxor    %xmm4,%xmm0
840         psrlq   $1,%xmm0
841         pxor    %xmm1,%xmm0
842         pshufd  $78,%xmm5,%xmm3
843         pshufd  $78,%xmm0,%xmm4
844         pxor    %xmm5,%xmm3
845         movdqu  %xmm5,48(%rdi)
846         pxor    %xmm0,%xmm4
847         movdqu  %xmm0,64(%rdi)
848 .byte   102,15,58,15,227,8
849         movdqu  %xmm4,80(%rdi)
850         .byte   0xf3,0xc3
851 .size   gcm_init_clmul,.-gcm_init_clmul
852 .globl  gcm_gmult_clmul
853 .type   gcm_gmult_clmul,@function
854 .align  16
855 gcm_gmult_clmul:
856 .L_gmult_clmul:
857         movdqu  (%rdi),%xmm0
858         movdqa  .Lbswap_mask(%rip),%xmm5
859         movdqu  (%rsi),%xmm2
860         movdqu  32(%rsi),%xmm4
861 .byte   102,15,56,0,197
862         movdqa  %xmm0,%xmm1
863         pshufd  $78,%xmm0,%xmm3
864         pxor    %xmm0,%xmm3
865 .byte   102,15,58,68,194,0
866 .byte   102,15,58,68,202,17
867 .byte   102,15,58,68,220,0
868         pxor    %xmm0,%xmm3
869         pxor    %xmm1,%xmm3
870
871         movdqa  %xmm3,%xmm4
872         psrldq  $8,%xmm3
873         pslldq  $8,%xmm4
874         pxor    %xmm3,%xmm1
875         pxor    %xmm4,%xmm0
876
877         movdqa  %xmm0,%xmm4
878         movdqa  %xmm0,%xmm3
879         psllq   $5,%xmm0
880         pxor    %xmm0,%xmm3
881         psllq   $1,%xmm0
882         pxor    %xmm3,%xmm0
883         psllq   $57,%xmm0
884         movdqa  %xmm0,%xmm3
885         pslldq  $8,%xmm0
886         psrldq  $8,%xmm3
887         pxor    %xmm4,%xmm0
888         pxor    %xmm3,%xmm1
889
890
891         movdqa  %xmm0,%xmm4
892         psrlq   $1,%xmm0
893         pxor    %xmm4,%xmm1
894         pxor    %xmm0,%xmm4
895         psrlq   $5,%xmm0
896         pxor    %xmm4,%xmm0
897         psrlq   $1,%xmm0
898         pxor    %xmm1,%xmm0
899 .byte   102,15,56,0,197
900         movdqu  %xmm0,(%rdi)
901         .byte   0xf3,0xc3
902 .size   gcm_gmult_clmul,.-gcm_gmult_clmul
903 .globl  gcm_ghash_clmul
904 .type   gcm_ghash_clmul,@function
905 .align  32
906 gcm_ghash_clmul:
907 .L_ghash_clmul:
908         movdqa  .Lbswap_mask(%rip),%xmm5
909         movq    $11547335547999543296,%rax
910
911         movdqu  (%rdi),%xmm0
912         movdqu  (%rsi),%xmm2
913         movdqu  32(%rsi),%xmm10
914 .byte   102,15,56,0,197
915
916         subq    $16,%rcx
917         jz      .Lodd_tail
918
919         movdqu  16(%rsi),%xmm9
920         cmpq    $48,%rcx
921         jb      .Lskip4x
922
923         subq    $48,%rcx
924         movdqu  48(%rsi),%xmm14
925         movdqu  64(%rsi),%xmm15
926
927
928
929
930         movdqu  48(%rdx),%xmm6
931         movdqu  32(%rdx),%xmm11
932 .byte   102,15,56,0,245
933 .byte   102,68,15,56,0,221
934         movdqa  %xmm6,%xmm8
935         pshufd  $78,%xmm6,%xmm7
936         pxor    %xmm6,%xmm7
937 .byte   102,15,58,68,242,0
938 .byte   102,68,15,58,68,194,17
939 .byte   102,65,15,58,68,250,0
940
941         movdqa  %xmm11,%xmm13
942         pshufd  $78,%xmm11,%xmm12
943         pxor    %xmm11,%xmm12
944 .byte   102,69,15,58,68,217,0
945 .byte   102,69,15,58,68,233,17
946         xorps   %xmm11,%xmm6
947 .byte   102,69,15,58,68,226,16
948         xorps   %xmm13,%xmm8
949         movups  80(%rsi),%xmm10
950         xorps   %xmm12,%xmm7
951
952         movdqu  16(%rdx),%xmm11
953         movdqu  0(%rdx),%xmm3
954 .byte   102,68,15,56,0,221
955 .byte   102,15,56,0,221
956         movdqa  %xmm11,%xmm13
957         pshufd  $78,%xmm11,%xmm12
958         pxor    %xmm3,%xmm0
959         pxor    %xmm11,%xmm12
960 .byte   102,69,15,58,68,222,0
961         movdqa  %xmm0,%xmm1
962         pshufd  $78,%xmm0,%xmm3
963         pxor    %xmm0,%xmm3
964 .byte   102,69,15,58,68,238,17
965         xorps   %xmm11,%xmm6
966 .byte   102,69,15,58,68,226,0
967         xorps   %xmm13,%xmm8
968
969         leaq    64(%rdx),%rdx
970         subq    $64,%rcx
971         jc      .Ltail4x
972
973         jmp     .Lmod4_loop
974 .align  32
975 .Lmod4_loop:
976 .byte   102,65,15,58,68,199,0
977         xorps   %xmm12,%xmm7
978         movdqu  48(%rdx),%xmm11
979 .byte   102,68,15,56,0,221
980 .byte   102,65,15,58,68,207,17
981         xorps   %xmm6,%xmm0
982         movdqu  32(%rdx),%xmm6
983         movdqa  %xmm11,%xmm13
984         pshufd  $78,%xmm11,%xmm12
985 .byte   102,65,15,58,68,218,16
986         xorps   %xmm8,%xmm1
987         pxor    %xmm11,%xmm12
988 .byte   102,15,56,0,245
989         movups  32(%rsi),%xmm10
990 .byte   102,68,15,58,68,218,0
991         xorps   %xmm7,%xmm3
992         movdqa  %xmm6,%xmm8
993         pshufd  $78,%xmm6,%xmm7
994
995         pxor    %xmm0,%xmm3
996         pxor    %xmm6,%xmm7
997         pxor    %xmm1,%xmm3
998         movdqa  %xmm3,%xmm4
999         pslldq  $8,%xmm3
1000 .byte   102,68,15,58,68,234,17
1001         psrldq  $8,%xmm4
1002         pxor    %xmm3,%xmm0
1003         movdqa  .L7_mask(%rip),%xmm3
1004         pxor    %xmm4,%xmm1
1005 .byte   102,72,15,110,224
1006
1007         pand    %xmm0,%xmm3
1008 .byte   102,15,56,0,227
1009 .byte   102,69,15,58,68,226,0
1010         pxor    %xmm0,%xmm4
1011         psllq   $57,%xmm4
1012         movdqa  %xmm4,%xmm3
1013         pslldq  $8,%xmm4
1014 .byte   102,65,15,58,68,241,0
1015         psrldq  $8,%xmm3
1016         pxor    %xmm4,%xmm0
1017         pxor    %xmm3,%xmm1
1018         movdqu  0(%rdx),%xmm3
1019
1020         movdqa  %xmm0,%xmm4
1021         psrlq   $1,%xmm0
1022 .byte   102,69,15,58,68,193,17
1023         xorps   %xmm11,%xmm6
1024         movdqu  16(%rdx),%xmm11
1025 .byte   102,68,15,56,0,221
1026 .byte   102,65,15,58,68,250,16
1027         xorps   %xmm13,%xmm8
1028         movups  80(%rsi),%xmm10
1029 .byte   102,15,56,0,221
1030         pxor    %xmm4,%xmm1
1031         pxor    %xmm0,%xmm4
1032         psrlq   $5,%xmm0
1033
1034         movdqa  %xmm11,%xmm13
1035         pxor    %xmm12,%xmm7
1036         pshufd  $78,%xmm11,%xmm12
1037         pxor    %xmm11,%xmm12
1038 .byte   102,69,15,58,68,222,0
1039         pxor    %xmm4,%xmm0
1040         pxor    %xmm3,%xmm1
1041         psrlq   $1,%xmm0
1042 .byte   102,69,15,58,68,238,17
1043         xorps   %xmm11,%xmm6
1044         pxor    %xmm1,%xmm0
1045
1046 .byte   102,69,15,58,68,226,0
1047         xorps   %xmm13,%xmm8
1048
1049         movdqa  %xmm0,%xmm1
1050         pshufd  $78,%xmm0,%xmm3
1051         pxor    %xmm0,%xmm3
1052
1053         leaq    64(%rdx),%rdx
1054         subq    $64,%rcx
1055         jnc     .Lmod4_loop
1056
1057 .Ltail4x:
1058 .byte   102,65,15,58,68,199,0
1059         xorps   %xmm12,%xmm7
1060 .byte   102,65,15,58,68,207,17
1061         xorps   %xmm6,%xmm0
1062 .byte   102,65,15,58,68,218,16
1063         xorps   %xmm8,%xmm1
1064         pxor    %xmm0,%xmm1
1065         pxor    %xmm7,%xmm3
1066
1067         pxor    %xmm1,%xmm3
1068         pxor    %xmm0,%xmm1
1069
1070         movdqa  %xmm3,%xmm4
1071         psrldq  $8,%xmm3
1072         pslldq  $8,%xmm4
1073         pxor    %xmm3,%xmm1
1074         pxor    %xmm4,%xmm0
1075
1076         movdqa  %xmm0,%xmm4
1077         movdqa  %xmm0,%xmm3
1078         psllq   $5,%xmm0
1079         pxor    %xmm0,%xmm3
1080         psllq   $1,%xmm0
1081         pxor    %xmm3,%xmm0
1082         psllq   $57,%xmm0
1083         movdqa  %xmm0,%xmm3
1084         pslldq  $8,%xmm0
1085         psrldq  $8,%xmm3
1086         pxor    %xmm4,%xmm0
1087         pxor    %xmm3,%xmm1
1088
1089
1090         movdqa  %xmm0,%xmm4
1091         psrlq   $1,%xmm0
1092         pxor    %xmm4,%xmm1
1093         pxor    %xmm0,%xmm4
1094         psrlq   $5,%xmm0
1095         pxor    %xmm4,%xmm0
1096         psrlq   $1,%xmm0
1097         pxor    %xmm1,%xmm0
1098         addq    $64,%rcx
1099         jz      .Ldone
1100         movdqu  32(%rsi),%xmm10
1101         subq    $16,%rcx
1102         jz      .Lodd_tail
1103 .Lskip4x:
1104
1105
1106
1107
1108
1109         movdqu  (%rdx),%xmm3
1110         movdqu  16(%rdx),%xmm6
1111 .byte   102,15,56,0,221
1112 .byte   102,15,56,0,245
1113         pxor    %xmm3,%xmm0
1114
1115         movdqa  %xmm6,%xmm8
1116         pshufd  $78,%xmm6,%xmm3
1117         pxor    %xmm6,%xmm3
1118 .byte   102,15,58,68,242,0
1119 .byte   102,68,15,58,68,194,17
1120 .byte   102,65,15,58,68,218,0
1121
1122         leaq    32(%rdx),%rdx
1123         subq    $32,%rcx
1124         jbe     .Leven_tail
1125         jmp     .Lmod_loop
1126
1127 .align  32
1128 .Lmod_loop:
1129         movdqa  %xmm0,%xmm1
1130         pshufd  $78,%xmm0,%xmm4
1131         pxor    %xmm0,%xmm4
1132
1133 .byte   102,65,15,58,68,193,0
1134 .byte   102,65,15,58,68,201,17
1135 .byte   102,65,15,58,68,226,16
1136
1137         pxor    %xmm6,%xmm0
1138         pxor    %xmm8,%xmm1
1139         movdqu  (%rdx),%xmm8
1140 .byte   102,68,15,56,0,197
1141         movdqu  16(%rdx),%xmm6
1142
1143         pxor    %xmm0,%xmm3
1144         pxor    %xmm1,%xmm3
1145         pxor    %xmm8,%xmm1
1146         pxor    %xmm3,%xmm4
1147 .byte   102,15,56,0,245
1148         movdqa  %xmm4,%xmm3
1149         psrldq  $8,%xmm3
1150         pslldq  $8,%xmm4
1151         pxor    %xmm3,%xmm1
1152         pxor    %xmm4,%xmm0
1153
1154         movdqa  %xmm6,%xmm8
1155
1156         movdqa  %xmm0,%xmm4
1157         movdqa  %xmm0,%xmm3
1158         psllq   $5,%xmm0
1159 .byte   102,15,58,68,242,0
1160         pxor    %xmm0,%xmm3
1161         psllq   $1,%xmm0
1162         pxor    %xmm3,%xmm0
1163         psllq   $57,%xmm0
1164         movdqa  %xmm0,%xmm3
1165         pslldq  $8,%xmm0
1166         psrldq  $8,%xmm3
1167         pxor    %xmm4,%xmm0
1168         pxor    %xmm3,%xmm1
1169         pshufd  $78,%xmm8,%xmm3
1170         pxor    %xmm8,%xmm3
1171
1172 .byte   102,68,15,58,68,194,17
1173         movdqa  %xmm0,%xmm4
1174         psrlq   $1,%xmm0
1175         pxor    %xmm4,%xmm1
1176         pxor    %xmm0,%xmm4
1177         psrlq   $5,%xmm0
1178         pxor    %xmm4,%xmm0
1179         psrlq   $1,%xmm0
1180 .byte   102,65,15,58,68,218,0
1181         pxor    %xmm1,%xmm0
1182
1183         leaq    32(%rdx),%rdx
1184         subq    $32,%rcx
1185         ja      .Lmod_loop
1186
1187 .Leven_tail:
1188         movdqa  %xmm0,%xmm1
1189         pshufd  $78,%xmm0,%xmm4
1190         pxor    %xmm0,%xmm4
1191
1192 .byte   102,65,15,58,68,193,0
1193 .byte   102,65,15,58,68,201,17
1194 .byte   102,65,15,58,68,226,16
1195
1196         pxor    %xmm6,%xmm0
1197         pxor    %xmm8,%xmm1
1198         pxor    %xmm0,%xmm3
1199         pxor    %xmm1,%xmm3
1200         pxor    %xmm3,%xmm4
1201         movdqa  %xmm4,%xmm3
1202         psrldq  $8,%xmm3
1203         pslldq  $8,%xmm4
1204         pxor    %xmm3,%xmm1
1205         pxor    %xmm4,%xmm0
1206
1207         movdqa  %xmm0,%xmm4
1208         movdqa  %xmm0,%xmm3
1209         psllq   $5,%xmm0
1210         pxor    %xmm0,%xmm3
1211         psllq   $1,%xmm0
1212         pxor    %xmm3,%xmm0
1213         psllq   $57,%xmm0
1214         movdqa  %xmm0,%xmm3
1215         pslldq  $8,%xmm0
1216         psrldq  $8,%xmm3
1217         pxor    %xmm4,%xmm0
1218         pxor    %xmm3,%xmm1
1219
1220
1221         movdqa  %xmm0,%xmm4
1222         psrlq   $1,%xmm0
1223         pxor    %xmm4,%xmm1
1224         pxor    %xmm0,%xmm4
1225         psrlq   $5,%xmm0
1226         pxor    %xmm4,%xmm0
1227         psrlq   $1,%xmm0
1228         pxor    %xmm1,%xmm0
1229         testq   %rcx,%rcx
1230         jnz     .Ldone
1231
1232 .Lodd_tail:
1233         movdqu  (%rdx),%xmm3
1234 .byte   102,15,56,0,221
1235         pxor    %xmm3,%xmm0
1236         movdqa  %xmm0,%xmm1
1237         pshufd  $78,%xmm0,%xmm3
1238         pxor    %xmm0,%xmm3
1239 .byte   102,15,58,68,194,0
1240 .byte   102,15,58,68,202,17
1241 .byte   102,65,15,58,68,218,0
1242         pxor    %xmm0,%xmm3
1243         pxor    %xmm1,%xmm3
1244
1245         movdqa  %xmm3,%xmm4
1246         psrldq  $8,%xmm3
1247         pslldq  $8,%xmm4
1248         pxor    %xmm3,%xmm1
1249         pxor    %xmm4,%xmm0
1250
1251         movdqa  %xmm0,%xmm4
1252         movdqa  %xmm0,%xmm3
1253         psllq   $5,%xmm0
1254         pxor    %xmm0,%xmm3
1255         psllq   $1,%xmm0
1256         pxor    %xmm3,%xmm0
1257         psllq   $57,%xmm0
1258         movdqa  %xmm0,%xmm3
1259         pslldq  $8,%xmm0
1260         psrldq  $8,%xmm3
1261         pxor    %xmm4,%xmm0
1262         pxor    %xmm3,%xmm1
1263
1264
1265         movdqa  %xmm0,%xmm4
1266         psrlq   $1,%xmm0
1267         pxor    %xmm4,%xmm1
1268         pxor    %xmm0,%xmm4
1269         psrlq   $5,%xmm0
1270         pxor    %xmm4,%xmm0
1271         psrlq   $1,%xmm0
1272         pxor    %xmm1,%xmm0
1273 .Ldone:
1274 .byte   102,15,56,0,197
1275         movdqu  %xmm0,(%rdi)
1276         .byte   0xf3,0xc3
1277 .size   gcm_ghash_clmul,.-gcm_ghash_clmul
1278 .globl  gcm_init_avx
1279 .type   gcm_init_avx,@function
1280 .align  32
1281 gcm_init_avx:
1282         jmp     .L_init_clmul
1283 .size   gcm_init_avx,.-gcm_init_avx
1284 .globl  gcm_gmult_avx
1285 .type   gcm_gmult_avx,@function
1286 .align  32
1287 gcm_gmult_avx:
1288         jmp     .L_gmult_clmul
1289 .size   gcm_gmult_avx,.-gcm_gmult_avx
1290 .globl  gcm_ghash_avx
1291 .type   gcm_ghash_avx,@function
1292 .align  32
1293 gcm_ghash_avx:
1294         jmp     .L_ghash_clmul
1295 .size   gcm_ghash_avx,.-gcm_ghash_avx
1296 .align  64
1297 .Lbswap_mask:
1298 .byte   15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
1299 .L0x1c2_polynomial:
1300 .byte   1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
1301 .L7_mask:
1302 .long   7,0,7,0
1303 .L7_mask_poly:
1304 .long   7,0,450,0
1305 .align  64
1306 .type   .Lrem_4bit,@object
1307 .Lrem_4bit:
1308 .long   0,0,0,471859200,0,943718400,0,610271232
1309 .long   0,1887436800,0,1822425088,0,1220542464,0,1423966208
1310 .long   0,3774873600,0,4246732800,0,3644850176,0,3311403008
1311 .long   0,2441084928,0,2376073216,0,2847932416,0,3051356160
1312 .type   .Lrem_8bit,@object
1313 .Lrem_8bit:
1314 .value  0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
1315 .value  0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
1316 .value  0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
1317 .value  0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
1318 .value  0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
1319 .value  0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
1320 .value  0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
1321 .value  0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
1322 .value  0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
1323 .value  0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
1324 .value  0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
1325 .value  0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
1326 .value  0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
1327 .value  0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
1328 .value  0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
1329 .value  0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
1330 .value  0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
1331 .value  0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
1332 .value  0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
1333 .value  0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
1334 .value  0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
1335 .value  0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
1336 .value  0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
1337 .value  0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
1338 .value  0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
1339 .value  0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
1340 .value  0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
1341 .value  0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
1342 .value  0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
1343 .value  0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
1344 .value  0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
1345 .value  0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
1346
1347 .byte   71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1348 .align  64
1349
1350 .section .note.GNU-stack,"",%progbits