summaryrefslogtreecommitdiff
path: root/core/bcopy32.inc
blob: c4edc715f25e00ba052b70238a1557357de25a5f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
;; -----------------------------------------------------------------------
;;
;;   Copyright 1994-2009 H. Peter Anvin - All Rights Reserved
;;   Copyright 2009 Intel Corporation; author: H. Peter Anvin
;;
;;   This program is free software; you can redistribute it and/or modify
;;   it under the terms of the GNU General Public License as published by
;;   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
;;   Boston MA 02111-1307, USA; either version 2 of the License, or
;;   (at your option) any later version; incorporated herein by reference.
;;
;; -----------------------------------------------------------------------

;;
;; bcopy32.inc
;;
;; 32-bit bcopy routine for real mode
;;

;
; 32-bit bcopy routine for real mode
;
; We enter protected mode, set up a flat 32-bit environment, run rep movsd
; and then exit.  IMPORTANT: This code assumes cs == 0.
;
; This code is probably excessively anal-retentive in its handling of
; segments, but this stuff is painful enough as it is without having to rely
; on everything happening "as it ought to."
;

		bits 16
		section .text

;
; bcopy:
;	32-bit copy, overlap safe
;
; Inputs:
;	ESI	- source pointer (-1 means do bzero rather than bcopy)
;	EDI	- target pointer
;	ECX	- byte count
;	DF	- zero
;
; Outputs:
;	ESI	- first byte after source (garbage if ESI == -1 on entry)
;	EDI	- first byte after target
;
bcopy:		jecxz .ret
		pushad
		push word pm_bcopy
		call simple_pm_call
		popad
		add edi,ecx
		add esi,ecx
.ret:		ret

;
; shuffle_and_boot_raw:
;	The new version of shuffle and boot.
; Inputs:
;	ESI		-> Pointer to list of (dst, src, len) pairs(*)
;	EDI		-> Pointer to safe area for list + shuffler
;			   (must not overlap this code nor the RM stack)
;	ECX		-> Byte count of list area (for initial copy)
;
;     If src == -1: then the memory pointed to by (dst, len) is bzeroed;
;		    this is handled inside the bcopy routine.
;
;     If len == 0:  this marks the end of the list; dst indicates
;		    the entry point and src the mode (0 = pm, 1 = rm)
;
shuffle_and_boot_raw:
		push word pm_shuffle
		call simple_pm_call
		; Never returns...
		jmp kaboom

;
; This routine is used to invoke a simple routine in 32-bit protected
; mode (with 32-bit zero-based CS, DS, ES, and SS, with ESP pointing to the
; real-mode stack even if the real-mode stack was in a nonzero SS.)
;
; No interrupt thunking services are provided; interrupts are disabled
; for the duration of the routine.  Don't run for too long at a time
; unless you really mean it.
;
; Inputs:
;	On stack	- pm entrypoint (IP only)
;	EAX, EBP preserved until real-mode exit
;	EBX, ECX, EDX, ESI and EDI passed to the called routine
;
; Outputs:
;	EAX, EBP restored from real-mode entry
;	All other registers as returned from called function
;	PM entrypoint cleaned off stack
;
simple_pm_call:
		push eax
		push ebp
		movzx ebp,sp		; BP is used as frame pointer
		pushfd			; Saves, among others, the IF flag
		push ds
		push es
		push fs
		push gs

		cli
		call enable_a20

		mov byte [cs:bcopy_gdt.TSS+5],89h	; Mark TSS unbusy

		; Convert the stack segment to a base
		xor eax,eax
		mov ax,ss
		shl eax,4
		add ebp,eax		; EBP is now an absolute frame ptr

		; Save the old segmented stack pointer
		mov [cs:.rm_esp],esp
		mov [cs:.rm_ss],ss

		o32 lgdt [cs:bcopy_gdt]
		mov eax,cr0
		or al,1
		mov cr0,eax		; Enter protected mode
		jmp PM_CS32:.in_pm

		bits 32
.in_pm:
		mov ax,PM_DS32
		mov ss,eax
		lea esp,[ebp-8*4-2*4]	; Flat mode stack
		mov es,eax
		mov ds,eax

		; Set fs, gs, tr, and ldtr in case we're on a virtual
		; machine running on Intel VT hardware -- it can't
		; deal with a partial transition, for no good reason.

		mov al,PM_DS16		; Real-mode-like segment
		mov fs,eax
		mov gs,eax
		mov al,PM_TSS		; Intel VT really doesn't want
		ltr ax			; an invalid TR and LDTR, so give
		xor eax,eax		; it something that it can use...
		lldt ax			; (sigh)

		movzx eax,word [ebp+2*4+2]
		call eax		; Call actual routine

		jmp PM_CS16:.exit
		bits 16
.exit:
		mov ax,PM_DS16		; "Real-mode-like" data segment
		mov es,eax
		mov ds,eax
		mov ss,eax

		mov eax,cr0
		and al,~1
		mov cr0,eax		; Disable protected mode
		jmp 0:.in_rm

.in_rm:		; Back in real mode
		lss esp,[cs:.rm_esp]	; Restore the stack
		pop gs
		pop fs
		pop es
		pop ds

		popfd			; Re-enables interrupts
		pop ebp
		pop eax
		ret 2			; Drops the pm entry

		section .bss
		alignb 4
.rm_esp		resd 1
.rm_ss		resw 1


		section .text
;
; Routines to enable and disable (yuck) A20.  These routines are gathered
; from tips from a couple of sources, including the Linux kernel and
; http://www.x86.org/.  The need for the delay to be as large as given here
; is indicated by Donnie Barnes of RedHat, the problematic system being an
; IBM ThinkPad 760EL.
;
; We typically toggle A20 twice for every 64K transferred.
;
%define	io_delay	call _io_delay
%define IO_DELAY_PORT	80h		; Invalid port (we hope!)
%define disable_wait	32		; How long to wait for a disable

; Note the skip of 2 here
%define A20_DUNNO	0		; A20 type unknown
%define A20_NONE	2		; A20 always on?
%define A20_BIOS	4		; A20 BIOS enable
%define A20_KBC		6		; A20 through KBC
%define A20_FAST	8		; A20 through port 92h

slow_out:	out dx, al		; Fall through

_io_delay:	out IO_DELAY_PORT,al
		out IO_DELAY_PORT,al
		ret

		section .data
		align 2
A20Ptr		dw a20_dunno

		section .bss
A20Test		resw 1			; Counter for testing A20 status
A20Tries	resb 1			; Times until giving up on A20

		section .text
enable_a20:
		pushad
		mov byte [cs:A20Tries],255 ; Times to try to make this work

try_enable_a20:
;
; Flush the caches
;
%if DO_WBINVD
		call try_wbinvd
%endif

;
; If the A20 type is known, jump straight to type
;
		jmp word [cs:A20Ptr]

;
; First, see if we are on a system with no A20 gate
;
a20_dunno:
a20_none:
		mov word [cs:A20Ptr], a20_none
		call a20_test
		jnz a20_done

;
; Next, try the BIOS (INT 15h AX=2401h)
;
a20_bios:
		mov word [cs:A20Ptr], a20_bios
		mov ax,2401h
		pushf				; Some BIOSes muck with IF
		int 15h
		popf

		call a20_test
		jnz a20_done

;
; Enable the keyboard controller A20 gate
;
a20_kbc:
		mov dl, 1			; Allow early exit
		call empty_8042
		jnz a20_done			; A20 live, no need to use KBC

		mov word [cs:A20Ptr], a20_kbc	; Starting KBC command sequence

		mov al,0D1h			; Write output port
		out 064h, al
		call empty_8042_uncond

		mov al,0DFh			; A20 on
		out 060h, al
		call empty_8042_uncond

		; Apparently the UHCI spec assumes that A20 toggle
		; ends with a null command (assumed to be for sychronization?)
		; Put it here to see if it helps anything...
		mov al,0FFh			; Null command
		out 064h, al
		call empty_8042_uncond

		; Verify that A20 actually is enabled.  Do that by
		; observing a word in low memory and the same word in
		; the HMA until they are no longer coherent.  Note that
		; we don't do the same check in the disable case, because
		; we don't want to *require* A20 masking (SYSLINUX should
		; work fine without it, if the BIOS does.)
.kbc_wait:	push cx
		xor cx,cx
.kbc_wait_loop:
		call a20_test
		jnz a20_done_pop
		loop .kbc_wait_loop

		pop cx
;
; Running out of options here.  Final attempt: enable the "fast A20 gate"
;
a20_fast:
		mov word [cs:A20Ptr], a20_fast
		in al, 092h
		or al,02h
		and al,~01h			; Don't accidentally reset the machine!
		out 092h, al

.fast_wait:	push cx
		xor cx,cx
.fast_wait_loop:
		call a20_test
		jnz a20_done_pop
		loop .fast_wait_loop

		pop cx

;
; Oh bugger.  A20 is not responding.  Try frobbing it again; eventually give up
; and report failure to the user.
;
		dec byte [cs:A20Tries]
		jnz a20_dunno		; Did we get the wrong type?

		mov si, err_a20
		jmp abort_load

		section .data
err_a20		db CR, LF, 'A20 gate not responding!', CR, LF, 0
		section .text

;
; A20 unmasked, proceed...
;
a20_done_pop:	pop cx
a20_done:	popad
		ret

;
; This routine tests if A20 is enabled (ZF = 0).  This routine
; must not destroy any register contents.
;
a20_test:
		push es
		push cx
		push ax
		mov cx,0FFFFh		; HMA = segment 0FFFFh
		mov es,cx
		mov cx,32		; Loop count
		mov ax,[cs:A20Test]
.a20_wait:	inc ax
		mov [cs:A20Test],ax
		io_delay		; Serialize, and fix delay
		cmp ax,[es:A20Test+10h]
		loopz .a20_wait
.a20_done:	pop ax
		pop cx
		pop es
		ret

;
; Routine to empty the 8042 KBC controller.  If dl != 0
; then we will test A20 in the loop and exit if A20 is
; suddenly enabled.
;
empty_8042_uncond:
		xor dl,dl
empty_8042:
		call a20_test
		jz .a20_on
		and dl,dl
		jnz .done
.a20_on:	io_delay
		in al, 064h		; Status port
		test al,1
		jz .no_output
		io_delay
		in al, 060h		; Read input
		jmp short empty_8042
.no_output:
		test al,2
		jnz empty_8042
		io_delay
.done:		ret

;
; Execute a WBINVD instruction if possible on this CPU
;
%if DO_WBINVD
try_wbinvd:
		wbinvd
		ret
%endif

;
; The 32-bit copy and shuffle code is "special", so it is in its own file
;
%include "bcopyxx.inc"