summaryrefslogtreecommitdiff
path: root/nss/lib/freebl/intel-aes.s
diff options
context:
space:
mode:
Diffstat (limited to 'nss/lib/freebl/intel-aes.s')
-rw-r--r--nss/lib/freebl/intel-aes.s284
1 files changed, 155 insertions, 129 deletions
diff --git a/nss/lib/freebl/intel-aes.s b/nss/lib/freebl/intel-aes.s
index a83529a..2dfcfa1 100644
--- a/nss/lib/freebl/intel-aes.s
+++ b/nss/lib/freebl/intel-aes.s
@@ -7,6 +7,20 @@
#define IV_OFFSET 16
#define EXPANDED_KEY_OFFSET 48
+/*
+ * Warning: the length values used in this module are "unsigned int"
+ * in C, which is 32-bit. When they're passed in registers, use only
+ * the low 32 bits, because the top half is unspecified.
+ *
+ * This is called from C code, so the contents of those bits can
+ * depend on the C compiler's optimization decisions. This means that
+ * mistakes might not be obvious in testing if those bits happen to be
+ * zero in your build.
+ *
+ * Exception: 32-bit lea instructions use a 64-bit address because the
+ * address size doesn't affect the result, and that form is more
+ * compactly encoded and preferred by compilers over a 32-bit address.
+ */
/* in %rdi : the key
in %rsi : buffer for expanded key
@@ -119,10 +133,11 @@ key_expansion128:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_128,@function
@@ -135,11 +150,11 @@ intel_aes_encrypt_ecb_128:
movdqu (%rdi), %xmm2
movdqu 160(%rdi), %xmm12
xor %eax, %eax
-// cmpq $8*16, %r9
- cmpq $128, %r9
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
jb 1f
-// leaq -8*16(%r9), %r11
- leaq -128(%r9), %r11
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
@@ -260,11 +275,11 @@ intel_aes_encrypt_ecb_128:
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
-// addq $8*16, %rax
- addq $128, %rax
- cmpq %r11, %rax
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
jbe 2b
-1: cmpq %rax, %r9
+1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
@@ -290,8 +305,8 @@ intel_aes_encrypt_ecb_128:
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcb /* aesenc %xmm11, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
@@ -302,10 +317,11 @@ intel_aes_encrypt_ecb_128:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_128,@function
@@ -318,11 +334,11 @@ intel_aes_decrypt_ecb_128:
movdqu (%rdi), %xmm2
movdqu 160(%rdi), %xmm12
xorl %eax, %eax
-// cmpq $8*16, %r9
- cmpq $128, %r9
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
jb 1f
-// leaq -8*16(%r9), %r11
- leaq -128(%r9), %r11
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
@@ -443,11 +459,11 @@ intel_aes_decrypt_ecb_128:
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
-// addq $8*16, %rax
- addq $128, %rax
- cmpq %r11, %rax
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
jbe 2b
-1: cmpq %rax, %r9
+1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
@@ -473,8 +489,8 @@ intel_aes_decrypt_ecb_128:
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm7, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
@@ -485,17 +501,18 @@ intel_aes_decrypt_ecb_128:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_128,@function
.globl intel_aes_encrypt_cbc_128
.align 16
intel_aes_encrypt_cbc_128:
- testq %r9, %r9
+ testl %r9d, %r9d
je 2f
// leaq IV_OFFSET(%rdi), %rdx
@@ -532,8 +549,8 @@ intel_aes_encrypt_cbc_128:
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcc /* aesenclast %xmm12, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 1b
movdqu %xmm0, (%rdx)
@@ -546,10 +563,11 @@ intel_aes_encrypt_cbc_128:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_128,@function
@@ -565,9 +583,9 @@ intel_aes_decrypt_cbc_128:
movdqu (%rdi), %xmm2 /* first key block */
movdqu 160(%rdi), %xmm12 /* last key block */
xorl %eax, %eax
- cmpq $128, %r9
+ cmpl $128, %r9d
jb 1f
- leaq -128(%r9), %r11
+ leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3 /* 1st data block */
movdqu 16(%r8, %rax), %xmm4 /* 2d data block */
movdqu 32(%r8, %rax), %xmm5
@@ -704,10 +722,10 @@ intel_aes_decrypt_cbc_128:
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
- addq $128, %rax
- cmpq %r11, %rax
+ addl $128, %eax
+ cmpl %r11d, %eax
jbe 2b
-1: cmpq %rax, %r9
+1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
@@ -736,8 +754,8 @@ intel_aes_decrypt_cbc_128:
pxor %xmm0, %xmm1
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm13, %xmm0
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 4b
5: movdqu %xmm0, (%rdx)
@@ -873,10 +891,11 @@ key_expansion192:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_192,@function
@@ -889,11 +908,11 @@ intel_aes_encrypt_ecb_192:
movdqu (%rdi), %xmm2
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
-// cmpq $8*16, %r9
- cmpq $128, %r9
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
jb 1f
-// leaq -8*16(%r9), %r11
- leaq -128(%r9), %r11
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
@@ -1033,11 +1052,11 @@ intel_aes_encrypt_ecb_192:
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
-// addq $8*16, %rax
- addq $128, %rax
- cmpq %r11, %rax
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
jbe 2b
-1: cmpq %rax, %r9
+1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
@@ -1067,8 +1086,8 @@ intel_aes_encrypt_ecb_192:
.byte 0x66,0x41,0x0f,0x38,0xdc,0xcd /* aesenc %xmm13, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
@@ -1079,10 +1098,11 @@ intel_aes_encrypt_ecb_192:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_192,@function
@@ -1095,11 +1115,11 @@ intel_aes_decrypt_ecb_192:
movdqu (%rdi), %xmm2
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
-// cmpq $8*16, %r9
- cmpq $128, %r9
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
jb 1f
-// leaq -8*16(%r9), %r11
- leaq -128(%r9), %r11
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
@@ -1239,11 +1259,11 @@ intel_aes_decrypt_ecb_192:
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
-// addq $8*16, %rax
- addq $128, %rax
- cmpq %r11, %rax
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
jbe 2b
-1: cmpq %rax, %r9
+1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
@@ -1273,8 +1293,8 @@ intel_aes_decrypt_ecb_192:
.byte 0x66,0x0f,0x38,0xde,0xcb /* aesdec %xmm3, %xmm1 */
.byte 0x66,0x0f,0x38,0xdf,0xca /* aesdeclast %xmm2, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
@@ -1285,17 +1305,18 @@ intel_aes_decrypt_ecb_192:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_192,@function
.globl intel_aes_encrypt_cbc_192
.align 16
intel_aes_encrypt_cbc_192:
- testq %r9, %r9
+ testl %r9d, %r9d
je 2f
// leaq IV_OFFSET(%rdi), %rdx
@@ -1336,8 +1357,8 @@ intel_aes_encrypt_cbc_192:
.byte 0x66,0x41,0x0f,0x38,0xdd,0xce /* aesenclast %xmm14, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 1b
movdqu %xmm0, (%rdx)
@@ -1350,10 +1371,11 @@ intel_aes_encrypt_cbc_192:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %exx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_192,@function
@@ -1367,9 +1389,9 @@ intel_aes_decrypt_cbc_192:
movdqu (%rdi), %xmm2
movdqu 192(%rdi), %xmm14
xorl %eax, %eax
- cmpq $128, %r9
+ cmpl $128, %r9d
jb 1f
- leaq -128(%r9), %r11
+ leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
@@ -1525,10 +1547,10 @@ intel_aes_decrypt_cbc_192:
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
- addq $128, %rax
- cmpq %r11, %rax
+ addl $128, %eax
+ cmpl %r11d, %eax
jbe 2b
-1: cmpq %rax, %r9
+1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm3
@@ -1561,8 +1583,8 @@ intel_aes_decrypt_cbc_192:
pxor %xmm0, %xmm1
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm15, %xmm0
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 4b
5: movdqu %xmm0, (%rdx)
@@ -1705,10 +1727,11 @@ key_expansion256:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_ecb_256,@function
@@ -1721,11 +1744,11 @@ intel_aes_encrypt_ecb_256:
movdqu (%rdi), %xmm2
movdqu 224(%rdi), %xmm15
xorl %eax, %eax
-// cmpq $8*16, %r9
- cmpq $128, %r9
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
jb 1f
-// leaq -8*16(%r9), %r11
- leaq -128(%r9), %r11
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
@@ -1884,11 +1907,11 @@ intel_aes_encrypt_ecb_256:
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
-// addq $8*16, %rax
- addq $128, %rax
- cmpq %r11, %rax
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
jbe 2b
-1: cmpq %rax, %r9
+1: cmpl %eax, %r9d
je 5f
movdqu (%rdi), %xmm8
@@ -1924,8 +1947,8 @@ intel_aes_encrypt_ecb_256:
.byte 0x66,0x41,0x0f,0x38,0xdc,0xce /* aesenc %xmm14, %xmm1 */
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
@@ -1936,10 +1959,11 @@ intel_aes_encrypt_ecb_256:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_ecb_256,@function
@@ -1952,11 +1976,11 @@ intel_aes_decrypt_ecb_256:
movdqu (%rdi), %xmm2
movdqu 224(%rdi), %xmm15
xorl %eax, %eax
-// cmpq $8*16, %r9
- cmpq $128, %r9
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
jb 1f
-// leaq -8*16(%r9), %r11
- leaq -128(%r9), %r11
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
@@ -2115,11 +2139,11 @@ intel_aes_decrypt_ecb_256:
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
-// addq $8*16, %rax
- addq $128, %rax
- cmpq %r11, %rax
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
jbe 2b
-1: cmpq %rax, %r9
+1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm2
@@ -2155,8 +2179,8 @@ intel_aes_decrypt_ecb_256:
.byte 0x66,0x41,0x0f,0x38,0xdf,0xc8 /* aesdeclast %xmm8, %xmm1 */
movdqu 112(%rdi), %xmm8
movdqu %xmm1, (%rsi, %rax)
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 4b
5: xor %eax, %eax
@@ -2167,17 +2191,18 @@ intel_aes_decrypt_ecb_256:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_encrypt_cbc_256,@function
.globl intel_aes_encrypt_cbc_256
.align 16
intel_aes_encrypt_cbc_256:
- testq %r9, %r9
+ testl %r9d, %r9d
je 2f
// leaq IV_OFFSET(%rdi), %rdx
@@ -2223,8 +2248,8 @@ intel_aes_encrypt_cbc_256:
.byte 0x66,0x41,0x0f,0x38,0xdd,0xcf /* aesenclast %xmm15, %xmm1 */
movdqu %xmm1, (%rsi, %rax)
movdqa %xmm1, %xmm0
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 1b
movdqu %xmm0, (%rdx)
@@ -2237,10 +2262,11 @@ intel_aes_encrypt_cbc_256:
/* in %rdi : cx - context
in %rsi : output - pointer to output buffer
in %rdx : outputLen - pointer to variable for length of output
- (filled by caller)
- in %rcx : maxOutputLen - length of output buffer
+ (already filled in by caller)
+ in %ecx : maxOutputLen - length of output buffer
+ (already checked by caller)
in %r8 : input - pointer to input buffer
- in %r9 : inputLen - length of input buffer
+ in %r9d : inputLen - length of input buffer
on stack: blocksize - AES blocksize (always 16, unused)
*/
.type intel_aes_decrypt_cbc_256,@function
@@ -2256,11 +2282,11 @@ intel_aes_decrypt_cbc_256:
movdqu (%rdi), %xmm2
movdqu 224(%rdi), %xmm15
xorl %eax, %eax
-// cmpq $8*16, %r9
- cmpq $128, %r9
+// cmpl $8*16, %r9d
+ cmpl $128, %r9d
jb 1f
-// leaq -8*16(%r9), %r11
- leaq -128(%r9), %r11
+// leal -8*16(%r9), %r11d
+ leal -128(%r9), %r11d
2: movdqu (%r8, %rax), %xmm3
movdqu 16(%r8, %rax), %xmm4
movdqu 32(%r8, %rax), %xmm5
@@ -2435,11 +2461,11 @@ intel_aes_decrypt_cbc_256:
movdqu %xmm8, 80(%rsi, %rax)
movdqu %xmm9, 96(%rsi, %rax)
movdqu %xmm10, 112(%rsi, %rax)
-// addq $8*16, %rax
- addq $128, %rax
- cmpq %r11, %rax
+// addl $8*16, %eax
+ addl $128, %eax
+ cmpl %r11d, %eax
jbe 2b
-1: cmpq %rax, %r9
+1: cmpl %eax, %r9d
je 5f
movdqu 16(%rdi), %xmm2
@@ -2477,8 +2503,8 @@ intel_aes_decrypt_cbc_256:
pxor %xmm0, %xmm1
movdqu (%r8, %rax), %xmm0 /* fetch the IV before we store the block */
movdqu %xmm1, (%rsi, %rax) /* in case input buf = output buf */
- addq $16, %rax
- cmpq %rax, %r9
+ addl $16, %eax
+ cmpl %eax, %r9d
jne 4b
5: movdqu %xmm0, (%rdx)