[e16e8f2] | 1 | ;; ----------------------------------------------------------------------- |
---|
| 2 | ;; |
---|
| 3 | ;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved |
---|
| 4 | ;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin |
---|
| 5 | ;; |
---|
| 6 | ;; This program is free software; you can redistribute it and/or modify |
---|
| 7 | ;; it under the terms of the GNU General Public License as published by |
---|
| 8 | ;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
---|
| 9 | ;; Boston MA 02111-1307, USA; either version 2 of the License, or |
---|
| 10 | ;; (at your option) any later version; incorporated herein by reference. |
---|
| 11 | ;; |
---|
| 12 | ;; ----------------------------------------------------------------------- |
---|
| 13 | |
---|
| 14 | ;; |
---|
| 15 | ;; bcopy32xx.inc |
---|
| 16 | ;; |
---|
| 17 | |
---|
| 18 | |
---|
| 19 | ; |
---|
| 20 | ; 32-bit bcopy routine |
---|
| 21 | ; |
---|
| 22 | ; This is the actual 32-bit portion of the bcopy and shuffle and boot |
---|
| 23 | ; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the |
---|
| 24 | ; sole exception being the actual relocation code at the beginning of |
---|
| 25 | ; pm_shuffle_boot. |
---|
| 26 | ; |
---|
| 27 | ; It also really needs to live all in a single segment, for the |
---|
| 28 | ; address calculcations to actually work. |
---|
| 29 | ; |
---|
| 30 | |
---|
| 31 | bits 32 |
---|
| 32 | section .bcopyxx.text |
---|
| 33 | align 16 |
---|
| 34 | ; |
---|
| 35 | ; pm_bcopy: |
---|
| 36 | ; |
---|
| 37 | ; This is the protected-mode core of the "bcopy" routine. |
---|
| 38 | ; Try to do aligned transfers; if the src and dst are relatively |
---|
| 39 | ; misaligned, align the dst. |
---|
| 40 | ; |
---|
| 41 | ; ECX is guaranteed to not be zero on entry. |
---|
| 42 | ; |
---|
| 43 | ; Clobbers ESI, EDI, ECX. |
---|
| 44 | ; |
---|
| 45 | |
---|
| 46 | pm_bcopy: |
---|
| 47 | push ebx |
---|
| 48 | push edx |
---|
| 49 | push eax |
---|
| 50 | |
---|
| 51 | cmp esi,-1 |
---|
| 52 | je .bzero |
---|
| 53 | |
---|
| 54 | cmp esi,edi ; If source < destination, we might |
---|
| 55 | jb .reverse ; have to copy backwards |
---|
| 56 | |
---|
| 57 | .forward: |
---|
| 58 | ; Initial alignment |
---|
| 59 | mov edx,edi |
---|
| 60 | shr edx,1 |
---|
| 61 | jnc .faa1 |
---|
| 62 | movsb |
---|
| 63 | dec ecx |
---|
| 64 | .faa1: |
---|
| 65 | mov al,cl |
---|
| 66 | cmp ecx,2 |
---|
| 67 | jb .f_tiny |
---|
| 68 | |
---|
| 69 | shr edx,1 |
---|
| 70 | jnc .faa2 |
---|
| 71 | movsw |
---|
| 72 | sub ecx,2 |
---|
| 73 | .faa2: |
---|
| 74 | |
---|
| 75 | ; Bulk transfer |
---|
| 76 | mov al,cl ; Save low bits |
---|
| 77 | shr ecx,2 ; Convert to dwords |
---|
| 78 | rep movsd ; Do our business |
---|
| 79 | ; At this point ecx == 0 |
---|
| 80 | |
---|
| 81 | test al,2 |
---|
| 82 | jz .fab2 |
---|
| 83 | movsw |
---|
| 84 | .fab2: |
---|
| 85 | .f_tiny: |
---|
| 86 | test al,1 |
---|
| 87 | jz .fab1 |
---|
| 88 | movsb |
---|
| 89 | .fab1: |
---|
| 90 | .done: |
---|
| 91 | pop eax |
---|
| 92 | pop edx |
---|
| 93 | pop ebx |
---|
| 94 | ret |
---|
| 95 | |
---|
| 96 | .reverse: |
---|
| 97 | lea eax,[esi+ecx-1] ; Point to final byte |
---|
| 98 | cmp edi,eax |
---|
| 99 | ja .forward ; No overlap, do forward copy |
---|
| 100 | |
---|
| 101 | std ; Reverse copy |
---|
| 102 | lea edi,[edi+ecx-1] |
---|
| 103 | mov esi,eax |
---|
| 104 | |
---|
| 105 | ; Initial alignment |
---|
| 106 | mov edx,edi |
---|
| 107 | shr edx,1 |
---|
| 108 | jc .raa1 |
---|
| 109 | movsb |
---|
| 110 | dec ecx |
---|
| 111 | .raa1: |
---|
| 112 | |
---|
| 113 | dec esi |
---|
| 114 | dec edi |
---|
| 115 | mov al,cl |
---|
| 116 | cmp ecx,2 |
---|
| 117 | jb .r_tiny |
---|
| 118 | shr edx,1 |
---|
| 119 | jc .raa2 |
---|
| 120 | movsw |
---|
| 121 | sub ecx,2 |
---|
| 122 | .raa2: |
---|
| 123 | |
---|
| 124 | ; Bulk copy |
---|
| 125 | sub esi,2 |
---|
| 126 | sub edi,2 |
---|
| 127 | mov al,cl ; Save low bits |
---|
| 128 | shr ecx,2 |
---|
| 129 | rep movsd |
---|
| 130 | |
---|
| 131 | ; Final alignment |
---|
| 132 | .r_final: |
---|
| 133 | add esi,2 |
---|
| 134 | add edi,2 |
---|
| 135 | test al,2 |
---|
| 136 | jz .rab2 |
---|
| 137 | movsw |
---|
| 138 | .rab2: |
---|
| 139 | .r_tiny: |
---|
| 140 | inc esi |
---|
| 141 | inc edi |
---|
| 142 | test al,1 |
---|
| 143 | jz .rab1 |
---|
| 144 | movsb |
---|
| 145 | .rab1: |
---|
| 146 | cld |
---|
| 147 | jmp short .done |
---|
| 148 | |
---|
| 149 | .bzero: |
---|
| 150 | xor eax,eax |
---|
| 151 | |
---|
| 152 | ; Initial alignment |
---|
| 153 | mov edx,edi |
---|
| 154 | shr edx,1 |
---|
| 155 | jnc .zaa1 |
---|
| 156 | stosb |
---|
| 157 | dec ecx |
---|
| 158 | .zaa1: |
---|
| 159 | |
---|
| 160 | mov bl,cl |
---|
| 161 | cmp ecx,2 |
---|
| 162 | jb .z_tiny |
---|
| 163 | shr edx,1 |
---|
| 164 | jnc .zaa2 |
---|
| 165 | stosw |
---|
| 166 | sub ecx,2 |
---|
| 167 | .zaa2: |
---|
| 168 | |
---|
| 169 | ; Bulk |
---|
| 170 | mov bl,cl ; Save low bits |
---|
| 171 | shr ecx,2 |
---|
| 172 | rep stosd |
---|
| 173 | |
---|
| 174 | test bl,2 |
---|
| 175 | jz .zab2 |
---|
| 176 | stosw |
---|
| 177 | .zab2: |
---|
| 178 | .z_tiny: |
---|
| 179 | test bl,1 |
---|
| 180 | jz .zab1 |
---|
| 181 | stosb |
---|
| 182 | .zab1: |
---|
| 183 | jmp short .done |
---|
| 184 | |
---|
| 185 | ; |
---|
| 186 | ; shuffle_and_boot: |
---|
| 187 | ; |
---|
| 188 | ; This routine is used to shuffle memory around, followed by |
---|
| 189 | ; invoking an entry point somewhere in low memory. This routine |
---|
| 190 | ; can clobber any memory outside the bcopy special area. |
---|
| 191 | ; |
---|
| 192 | ; IMPORTANT: This routine does not set up any registers. |
---|
| 193 | ; It is the responsibility of the caller to generate an appropriate entry |
---|
| 194 | ; stub; *especially* when going to real mode. |
---|
| 195 | ; |
---|
| 196 | ; Inputs: |
---|
| 197 | ; ESI -> Pointer to list of (dst, src, len) pairs(*) |
---|
| 198 | ; EDI -> Pointer to safe area for list + shuffler |
---|
| 199 | ; (must not overlap this code nor the RM stack) |
---|
| 200 | ; ECX -> Byte count of list area (for initial copy) |
---|
| 201 | ; |
---|
| 202 | ; If src == -1: then the memory pointed to by (dst, len) is bzeroed; |
---|
| 203 | ; this is handled inside the bcopy routine. |
---|
| 204 | ; |
---|
| 205 | ; If len == 0: this marks the end of the list; dst indicates |
---|
| 206 | ; the entry point and src the mode (0 = pm, 1 = rm) |
---|
| 207 | ; |
---|
| 208 | ; (*) dst, src, and len are four bytes each |
---|
| 209 | ; |
---|
| 210 | ; do_raw_shuffle_and_boot is the same entry point, but with a C ABI: |
---|
| 211 | ; do_raw_shuffle_and_boot(safearea, descriptors, bytecount) |
---|
| 212 | ; |
---|
| 213 | global do_raw_shuffle_and_boot |
---|
| 214 | do_raw_shuffle_and_boot: |
---|
| 215 | mov edi,eax |
---|
| 216 | mov esi,edx |
---|
| 217 | |
---|
| 218 | pm_shuffle: |
---|
| 219 | cli ; End interrupt service (for good) |
---|
| 220 | mov ebx,edi ; EBX <- descriptor list |
---|
| 221 | lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to |
---|
| 222 | and edx,~15 ; Align 16 to benefit the GDT |
---|
| 223 | call pm_bcopy |
---|
| 224 | mov esi,__bcopyxx_start ; Absolute source address |
---|
| 225 | mov edi,edx ; Absolute target address |
---|
| 226 | sub edx,esi ; EDX <- address delta |
---|
| 227 | mov ecx,__bcopyxx_dwords |
---|
| 228 | lea eax,[edx+.safe] ; Resume point |
---|
| 229 | ; Relocate this code |
---|
| 230 | rep movsd |
---|
| 231 | jmp eax ; Jump to safe location |
---|
| 232 | .safe: |
---|
| 233 | ; Give ourselves a safe stack |
---|
| 234 | lea esp,[edx+bcopyxx_stack+__bcopyxx_end] |
---|
| 235 | add edx,bcopy_gdt ; EDX <- new GDT |
---|
| 236 | mov [edx+2],edx ; GDT self-pointer |
---|
| 237 | lgdt [edx] ; Switch to local GDT |
---|
| 238 | |
---|
| 239 | ; Now for the actual shuffling... |
---|
| 240 | .loop: |
---|
| 241 | mov edi,[ebx] |
---|
| 242 | mov esi,[ebx+4] |
---|
| 243 | mov ecx,[ebx+8] |
---|
| 244 | add ebx,12 |
---|
| 245 | jecxz .done |
---|
| 246 | call pm_bcopy |
---|
| 247 | jmp .loop |
---|
| 248 | .done: |
---|
| 249 | lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT |
---|
| 250 | push ecx ; == 0, for cleaning the flags register |
---|
| 251 | and esi,esi |
---|
| 252 | jz pm_shuffle_16 |
---|
| 253 | popfd ; Clean the flags |
---|
| 254 | jmp edi ; Protected mode entry |
---|
| 255 | |
---|
| 256 | ; We have a 16-bit entry point, so we need to return |
---|
| 257 | ; to 16-bit mode. Note: EDX already points to the GDT. |
---|
| 258 | pm_shuffle_16: |
---|
| 259 | mov eax,edi |
---|
| 260 | mov [edx+PM_CS16+2],ax |
---|
| 261 | mov [edx+PM_DS16+2],ax |
---|
| 262 | shr eax,16 |
---|
| 263 | mov [edx+PM_CS16+4],al |
---|
| 264 | mov [edx+PM_CS16+7],ah |
---|
| 265 | mov [edx+PM_DS16+4],al |
---|
| 266 | mov [edx+PM_DS16+7],ah |
---|
| 267 | mov eax,cr0 |
---|
| 268 | and al,~1 |
---|
| 269 | popfd ; Clean the flags |
---|
| 270 | ; No flag-changing instructions below... |
---|
| 271 | mov dx,PM_DS16 |
---|
| 272 | mov ds,edx |
---|
| 273 | mov es,edx |
---|
| 274 | mov fs,edx |
---|
| 275 | mov gs,edx |
---|
| 276 | mov ss,edx |
---|
| 277 | jmp PM_CS16:0 |
---|
| 278 | |
---|
| 279 | section .bcopyxx.data |
---|
| 280 | |
---|
| 281 | alignz 16 |
---|
| 282 | ; GDT descriptor entry |
---|
| 283 | %macro desc 1 |
---|
| 284 | bcopy_gdt.%1: |
---|
| 285 | PM_%1 equ bcopy_gdt.%1-bcopy_gdt |
---|
| 286 | %endmacro |
---|
| 287 | |
---|
| 288 | bcopy_gdt: |
---|
| 289 | dw bcopy_gdt_size-1 ; Null descriptor - contains GDT |
---|
| 290 | dd bcopy_gdt ; pointer for LGDT instruction |
---|
| 291 | dw 0 |
---|
| 292 | |
---|
| 293 | ; TSS segment to keep Intel VT happy. Intel VT is |
---|
| 294 | ; unhappy about anything that doesn't smell like a |
---|
| 295 | ; full-blown 32-bit OS. |
---|
| 296 | desc TSS |
---|
| 297 | dw 104-1, DummyTSS ; 08h 32-bit task state segment |
---|
| 298 | dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS |
---|
| 299 | |
---|
| 300 | desc CS16 |
---|
| 301 | dd 0000ffffh ; 10h Code segment, use16, readable, |
---|
| 302 | dd 00009b00h ; present, dpl 0, cover 64K |
---|
| 303 | desc DS16 |
---|
| 304 | dd 0000ffffh ; 18h Data segment, use16, read/write, |
---|
| 305 | dd 00009300h ; present, dpl 0, cover 64K |
---|
| 306 | desc CS32 |
---|
| 307 | dd 0000ffffh ; 20h Code segment, use32, readable, |
---|
| 308 | dd 00cf9b00h ; present, dpl 0, cover all 4G |
---|
| 309 | desc DS32 |
---|
| 310 | dd 0000ffffh ; 28h Data segment, use32, read/write, |
---|
| 311 | dd 00cf9300h ; present, dpl 0, cover all 4G |
---|
| 312 | |
---|
| 313 | bcopy_gdt_size: equ $-bcopy_gdt |
---|
| 314 | ; |
---|
| 315 | ; Space for a dummy task state segment. It should never be actually |
---|
| 316 | ; accessed, but just in case it is, point to a chunk of memory that |
---|
| 317 | ; has a chance to not be used for anything real... |
---|
| 318 | ; |
---|
| 319 | DummyTSS equ 0x580 |
---|
| 320 | |
---|
| 321 | align 4 |
---|
| 322 | RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU) |
---|
| 323 | dd 0 ; Offset |
---|
| 324 | |
---|
| 325 | bcopyxx_stack equ 128 ; We want this much stack |
---|
| 326 | |
---|
| 327 | section .rodata |
---|
| 328 | global __syslinux_shuffler_size |
---|
| 329 | extern __bcopyxx_len |
---|
| 330 | align 4 |
---|
| 331 | __syslinux_shuffler_size: |
---|
| 332 | dd __bcopyxx_len |
---|
| 333 | |
---|
| 334 | bits 16 |
---|
| 335 | section .text16 |
---|