1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
|
;; -----------------------------------------------------------------------
;;
;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
;;
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
;; the Free Software Foundation, Inc., 53 Temple Place Ste 330,
;; Boston MA 02111-1307, USA; either version 2 of the License, or
;; (at your option) any later version; incorporated herein by reference.
;;
;; -----------------------------------------------------------------------
;;
;; bcopy32xx.inc
;;
;
; 32-bit bcopy routine
;
; This is the actual 32-bit portion of the bcopy and shuffle and boot
; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the
; sole exception being the actual relocation code at the beginning of
; pm_shuffle_boot.
;
; It also really needs to live all in a single segment, for the
; address calculcations to actually work.
;
bits 32
section .bcopyxx
align 16
bcopyxx_start equ $
;
; pm_bcopy:
;
; This is the protected-mode core of the "bcopy" routine.
; Try to do aligned transfers; if the src and dst are relatively
; misaligned, align the dst.
;
; ECX is guaranteed to not be zero on entry.
;
pm_bcopy:
push ebx
push edx
cmp esi,-1
je .bzero
cmp esi,edi ; If source < destination, we might
jb .reverse ; have to copy backwards
.forward:
; Initial alignment
mov dx,di
shr dx,1
jnc .faa1
a32 movsb
dec ecx
.faa1:
mov al,cl
cmp ecx,2
jb .f_tiny
shr dx,1
jnc .faa2
a32 movsw
sub ecx,2
.faa2:
; Bulk transfer
mov al,cl ; Save low bits
shr ecx,2 ; Convert to dwords
a32 rep movsd ; Do our business
; At this point ecx == 0
test al,2
jz .fab2
a32 movsw
.fab2:
.f_tiny:
test al,1
jz .fab1
a32 movsb
.fab1:
.done:
pop edx
pop ebx
ret
.reverse:
std ; Reverse copy
lea esi,[esi+ecx-1] ; Point to final byte
lea edi,[edi+ecx-1]
; Initial alignment
mov dx,di
shr dx,1
jnc .raa1
a32 movsb
dec ecx
.raa1:
dec esi
dec edi
mov al,cl
cmp ecx,2
jb .r_tiny
shr dx,1
jnc .raa2
a32 movsw
sub ecx,2
.raa2:
; Bulk copy
sub esi,2
sub edi,2
mov al,cl ; Save low bits
shr ecx,2
a32 rep movsd
; Final alignment
.r_final:
add esi,2
add edi,2
test al,2
jz .rab2
a32 movsw
.rab2:
.r_tiny:
inc esi
inc edi
test al,1
jz .rab1
a32 movsb
.rab1:
cld
jmp short .done
.bzero:
xor eax,eax
; Initial alignment
mov dx,di
shr dx,1
jnc .zaa1
a32 stosb
dec ecx
.zaa1:
mov bl,cl
cmp ecx,2
jb .z_tiny
shr dx,1
jnc .zaa2
a32 stosw
sub ecx,2
.zaa2:
; Bulk
mov bl,cl ; Save low bits
shr ecx,2
a32 rep stosd
test bl,2
jz .zab2
a32 stosw
.zab2:
.z_tiny:
test bl,1
jz .zab1
a32 stosb
.zab1:
jmp short .done
;
; shuffle_and_boot:
;
; This routine is used to shuffle memory around, followed by
; invoking an entry point somewhere in low memory. This routine
; can clobber any memory outside the bcopy special area.
;
; IMPORTANT: This routine does not set up any registers.
; It is the responsibility of the caller to generate an appropriate entry
; stub; *especially* when going to real mode.
;
; Inputs:
; ESI -> Pointer to list of (dst, src, len) pairs(*)
; EDI -> Pointer to safe area for list + shuffler
; (must not overlap this code nor the RM stack)
; ECX -> Byte count of list area (for initial copy)
;
; If src == -1: then the memory pointed to by (dst, len) is bzeroed;
; this is handled inside the bcopy routine.
;
; If len == 0: this marks the end of the list; dst indicates
; the entry point and src the mode (0 = pm, 1 = rm)
;
pm_shuffle:
mov ebx,edi ; EBX <- descriptor list
lea edx,[edi+ecx] ; EDX <- shuffler end location
push edx
call pm_bcopy
pop edx
mov edi,edx
mov esi,bcopyxx_start
mov ecx,bcopyxx_dwords
lea eax,[edx+.safe-bcopyxx_start] ; Resume point
; Relocate this code
rep movsd
jmp eax ; Jump to safe location
.safe:
; Give ourselves a safe stack
lea esp,[edx+bcopyxx_stack+bcopyxx_end-bcopyxx_start]
add edx,bcopy_gdt-bcopyxx_start
mov [edx+2],edx ; GDT self-pointer
lgdt [edx] ; Switch to local GDT
; Now for the actual shuffling...
.loop:
mov edi,[ebx]
mov esi,[ebx+4]
mov ecx,[ebx+8]
jecxz .done
call pm_bcopy
add ebx,12
jmp .loop
.done:
and esi,esi
jz pm_shuffle_real_mode
jmp edi ; Protected mode entry
; We have a real-mode entry point, so we need to return
; to real mode...
pm_shuffle_real_mode:
call .here
.here: pop eax
mov ebx,eax
add eax,.next-.here
mov [ebx-.here+.rm_entry],edi
mov [ebx-.here+bcopy_gdt.CS16+2],ax
shr eax,16
mov [ebx-.here+bcopy_gdt.CS16+4],al
mov [ebx-.here+bcopy_gdt.CS16+7],ah
mov eax,PM_DS16_RM
mov ds,eax
mov es,eax
mov fs,eax
mov gs,eax
mov ss,eax
jmp PM_CS16:0
bits 16
.next:
mov eax,cr0
and al,~1
mov cr0,eax
jmp 0:0
.rm_entry equ $-4
bits 32
align 16
; GDT descriptor entry
%macro desc 1
bcopy_gdt.%1:
PM_%1 equ bcopy_gdt.%1-bcopy_gdt
%endmacro
bcopy_gdt:
dw bcopy_gdt_size-1 ; Null descriptor - contains GDT
dd bcopy_gdt ; pointer for LGDT instruction
dw 0
desc CS16
dd 0000ffffh ; 08h Code segment, use16, readable,
dd 00009b00h ; present, dpl 0, cover 64K
desc DS16_4G
dd 0000ffffh ; 10h Data segment, use16, read/write,
dd 008f9300h ; present, dpl 0, cover all 4G
desc DS16_RM
dd 0000ffffh ; 18h Data segment, use16, read/write,
dd 00009300h ; present, dpl 0, cover 64K
desc CS32
dd 0000ffffh ; 20h Code segment, use32, readable,
dd 00cf9b00h ; present, dpl 0, cover all 4G
desc DS32
dd 0000ffffh ; 28h Data segment, use32, read/write,
dd 00cf9300h ; present, dpl 0, cover all 4G
; TSS segment to keep Intel VT happy. Intel VT is
; unhappy about anything that doesn't smell like a
; full-blown 32-bit OS.
desc TSS
dw 104-1, DummyTSS ; 30h 32-bit task state segment
dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS
; 16-bit stack segment, which may have a different
; base from DS16 (e.g. if we're booted from PXELINUX)
desc SS16
dd 0000ffffh ; 38h Data segment, use16, read/write,
dd 00009300h ; present, dpl 0, cover 64K
bcopy_gdt_size: equ $-bcopy_gdt
align 4, db 0
bcopyxx_end equ $ ; *Must* be dword-aligned!
bcopyxx_len equ $-bcopyxx_start
bcopyxx_dwords equ bcopyxx_len >> 2
bcopyxx_stack equ 128 ; We want this much stack
bcopyxx_safe equ bcopyxx_len + bcopyxx_stack
;
; Space for a dummy task state segment. It should never be actually
; accessed, but just in case it is, point to a chunk of memory not used
; for anything real.
;
DummyTSS equ 0x800
bits 16
section .text
|