summaryrefslogtreecommitdiff
path: root/sysdeps/i386/i586/memcpy.S
blob: 206715482df14a8492339cfaace92cf034011e18 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
/* Highly optimized version for i586.
   Copyright (C) 1997-2013 Free Software Foundation, Inc.
   This file is part of the GNU C Library.
   Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <http://www.gnu.org/licenses/>.  */

#include <sysdep.h>
#include "asm-syntax.h"
#include "bp-sym.h"
#include "bp-asm.h"

/* BEWARE: `#ifdef memcpy' means that memcpy is redefined as `mempcpy',
   and the return value is the byte after the last one copied in
   the destination. */
#define MEMPCPY_P (defined memcpy)

#define PARMS	LINKAGE+8	/* space for 2 saved regs */
#define RTN	PARMS
#define DEST	RTN+RTN_SIZE
#define SRC	DEST+PTR_SIZE
#define LEN	SRC+PTR_SIZE

        .text
#if defined PIC && !defined NOT_IN_libc
ENTRY (__memcpy_chk)
	movl	12(%esp), %eax
	cmpl	%eax, 16(%esp)
	jb	HIDDEN_JUMPTARGET (__chk_fail)
END (__memcpy_chk)
#endif
ENTRY (BP_SYM (memcpy))
	ENTER

	pushl	%edi
	cfi_adjust_cfa_offset (4)
	pushl	%esi
	cfi_adjust_cfa_offset (4)

	movl	DEST(%esp), %edi
	cfi_rel_offset (edi, 4)
	movl	SRC(%esp), %esi
	cfi_rel_offset (esi, 0)
	movl	LEN(%esp), %ecx
	CHECK_BOUNDS_BOTH_WIDE (%edi, DEST(%esp), %ecx)
	CHECK_BOUNDS_BOTH_WIDE (%esi, SRC(%esp), %ecx)
	movl	%edi, %eax

	/* We need this in any case.  */
	cld

	/* Cutoff for the big loop is a size of 32 bytes since otherwise
	   the loop will never be entered.  */
	cmpl	$32, %ecx
	jbe	L(1)

	negl	%eax
	andl	$3, %eax
	subl	%eax, %ecx
	xchgl	%eax, %ecx

	rep; movsb

	movl	%eax, %ecx
	subl	$32, %ecx
	js	L(2)

	/* Read ahead to make sure we write in the cache since the stupid
	   i586 designers haven't implemented read-on-write-miss.  */
	movl	(%edi), %eax
L(3):	movl	28(%edi), %edx

	/* Now correct the loop counter.  Please note that in the following
	   code the flags are not changed anymore.  */
	subl	$32, %ecx

	movl	(%esi), %eax
	movl	4(%esi), %edx
	movl	%eax, (%edi)
	movl	%edx, 4(%edi)
	movl	8(%esi), %eax
	movl	12(%esi), %edx
	movl	%eax, 8(%edi)
	movl	%edx, 12(%edi)
	movl	16(%esi), %eax
	movl	20(%esi), %edx
	movl	%eax, 16(%edi)
	movl	%edx, 20(%edi)
	movl	24(%esi), %eax
	movl	28(%esi), %edx
	movl	%eax, 24(%edi)
	movl	%edx, 28(%edi)

	leal	32(%esi), %esi
	leal	32(%edi), %edi

	jns	L(3)

	/* Correct extra loop counter modification.  */
L(2):	addl	$32, %ecx
#if !MEMPCPY_P
	movl	DEST(%esp), %eax
#endif

L(1):	rep; movsb

#if MEMPCPY_P
	movl	%edi, %eax
#endif

	popl	%esi
	cfi_adjust_cfa_offset (-4)
	cfi_restore (esi)
	popl	%edi
	cfi_adjust_cfa_offset (-4)
	cfi_restore (edi)

	LEAVE
	RET_PTR
END (BP_SYM (memcpy))
#if !MEMPCPY_P
libc_hidden_builtin_def (memcpy)
#endif