;; ----------------------------------------------------------------------- ;; ;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved ;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin ;; ;; This program is free software; you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by ;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, ;; Boston MA 02111-1307, USA; either version 2 of the License, or ;; (at your option) any later version; incorporated herein by reference. ;; ;; ----------------------------------------------------------------------- ;; ;; bcopy32xx.inc ;; ; ; 32-bit bcopy routine ; ; This is the actual 32-bit portion of the bcopy and shuffle and boot ; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the ; sole exception being the actual relocation code at the beginning of ; pm_shuffle_boot. ; ; It also really needs to live all in a single segment, for the ; address calculcations to actually work. ; bits 32 section .bcopyxx.text align 16 ; ; pm_bcopy: ; ; This is the protected-mode core of the "bcopy" routine. ; Try to do aligned transfers; if the src and dst are relatively ; misaligned, align the dst. ; ; ECX is guaranteed to not be zero on entry. ; ; Clobbers ESI, EDI, ECX. ; pm_bcopy: push ebx push edx push eax cmp esi,-1 je .bzero cmp esi,edi ; If source < destination, we might jb .reverse ; have to copy backwards .forward: ; Initial alignment mov edx,edi shr edx,1 jnc .faa1 movsb dec ecx .faa1: mov al,cl cmp ecx,2 jb .f_tiny shr edx,1 jnc .faa2 movsw sub ecx,2 .faa2: ; Bulk transfer mov al,cl ; Save low bits shr ecx,2 ; Convert to dwords rep movsd ; Do our business ; At this point ecx == 0 test al,2 jz .fab2 movsw .fab2: .f_tiny: test al,1 jz .fab1 movsb .fab1: .done: pop eax pop edx pop ebx ret .reverse: lea eax,[esi+ecx-1] ; Point to final byte cmp edi,eax ja .forward ; No overlap, do forward copy std ; Reverse copy lea edi,[edi+ecx-1] mov esi,eax ; Initial alignment mov edx,edi shr edx,1 jc .raa1 movsb dec ecx .raa1: dec esi dec edi mov al,cl cmp ecx,2 jb .r_tiny shr edx,1 jc .raa2 movsw sub ecx,2 .raa2: ; Bulk copy sub esi,2 sub edi,2 mov al,cl ; Save low bits shr ecx,2 rep movsd ; Final alignment .r_final: add esi,2 add edi,2 test al,2 jz .rab2 movsw .rab2: .r_tiny: inc esi inc edi test al,1 jz .rab1 movsb .rab1: cld jmp short .done .bzero: xor eax,eax ; Initial alignment mov edx,edi shr edx,1 jnc .zaa1 stosb dec ecx .zaa1: mov bl,cl cmp ecx,2 jb .z_tiny shr edx,1 jnc .zaa2 stosw sub ecx,2 .zaa2: ; Bulk mov bl,cl ; Save low bits shr ecx,2 rep stosd test bl,2 jz .zab2 stosw .zab2: .z_tiny: test bl,1 jz .zab1 stosb .zab1: jmp short .done ; ; shuffle_and_boot: ; ; This routine is used to shuffle memory around, followed by ; invoking an entry point somewhere in low memory. This routine ; can clobber any memory outside the bcopy special area. ; ; IMPORTANT: This routine does not set up any registers. ; It is the responsibility of the caller to generate an appropriate entry ; stub; *especially* when going to real mode. ; ; Inputs: ; ESI -> Pointer to list of (dst, src, len) pairs(*) ; EDI -> Pointer to safe area for list + shuffler ; (must not overlap this code nor the RM stack) ; ECX -> Byte count of list area (for initial copy) ; ; If src == -1: then the memory pointed to by (dst, len) is bzeroed; ; this is handled inside the bcopy routine. ; ; If len == 0: this marks the end of the list; dst indicates ; the entry point and src the mode (0 = pm, 1 = rm) ; ; (*) dst, src, and len are four bytes each ; ; do_raw_shuffle_and_boot is the same entry point, but with a C ABI: ; do_raw_shuffle_and_boot(safearea, descriptors, bytecount) ; global do_raw_shuffle_and_boot do_raw_shuffle_and_boot: mov edi,eax mov esi,edx pm_shuffle: cli ; End interrupt service (for good) mov ebx,edi ; EBX <- descriptor list lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to and edx,~15 ; Align 16 to benefit the GDT call pm_bcopy mov esi,__bcopyxx_start ; Absolute source address mov edi,edx ; Absolute target address sub edx,esi ; EDX <- address delta mov ecx,__bcopyxx_dwords lea eax,[edx+.safe] ; Resume point ; Relocate this code rep movsd jmp eax ; Jump to safe location .safe: ; Give ourselves a safe stack lea esp,[edx+bcopyxx_stack+__bcopyxx_end] add edx,bcopy_gdt ; EDX <- new GDT mov [edx+2],edx ; GDT self-pointer lgdt [edx] ; Switch to local GDT ; Now for the actual shuffling... .loop: mov edi,[ebx] mov esi,[ebx+4] mov ecx,[ebx+8] add ebx,12 jecxz .done call pm_bcopy jmp .loop .done: lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT push ecx ; == 0, for cleaning the flags register and esi,esi jz pm_shuffle_16 popfd ; Clean the flags jmp edi ; Protected mode entry ; We have a 16-bit entry point, so we need to return ; to 16-bit mode. Note: EDX already points to the GDT. pm_shuffle_16: mov eax,edi mov [edx+PM_CS16+2],ax mov [edx+PM_DS16+2],ax shr eax,16 mov [edx+PM_CS16+4],al mov [edx+PM_CS16+7],ah mov [edx+PM_DS16+4],al mov [edx+PM_DS16+7],ah mov eax,cr0 and al,~1 popfd ; Clean the flags ; No flag-changing instructions below... mov dx,PM_DS16 mov ds,edx mov es,edx mov fs,edx mov gs,edx mov ss,edx jmp PM_CS16:0 section .bcopyxx.data alignz 16 ; GDT descriptor entry %macro desc 1 bcopy_gdt.%1: PM_%1 equ bcopy_gdt.%1-bcopy_gdt %endmacro bcopy_gdt: dw bcopy_gdt_size-1 ; Null descriptor - contains GDT dd bcopy_gdt ; pointer for LGDT instruction dw 0 ; TSS segment to keep Intel VT happy. Intel VT is ; unhappy about anything that doesn't smell like a ; full-blown 32-bit OS. desc TSS dw 104-1, DummyTSS ; 08h 32-bit task state segment dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS desc CS16 dd 0000ffffh ; 10h Code segment, use16, readable, dd 00009b00h ; present, dpl 0, cover 64K desc DS16 dd 0000ffffh ; 18h Data segment, use16, read/write, dd 00009300h ; present, dpl 0, cover 64K desc CS32 dd 0000ffffh ; 20h Code segment, use32, readable, dd 00cf9b00h ; present, dpl 0, cover all 4G desc DS32 dd 0000ffffh ; 28h Data segment, use32, read/write, dd 00cf9300h ; present, dpl 0, cover all 4G bcopy_gdt_size: equ $-bcopy_gdt ; ; Space for a dummy task state segment. It should never be actually ; accessed, but just in case it is, point to a chunk of memory that ; has a chance to not be used for anything real... ; DummyTSS equ 0x580 align 4 RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU) dd 0 ; Offset bcopyxx_stack equ 128 ; We want this much stack section .rodata global __syslinux_shuffler_size extern __bcopyxx_len align 4 __syslinux_shuffler_size: dd __bcopyxx_len bits 16 section .text16