1 | ;; ----------------------------------------------------------------------- |
---|
2 | ;; |
---|
3 | ;; Copyright 1994-2009 H. Peter Anvin - All Rights Reserved |
---|
4 | ;; Copyright 2009-2010 Intel Corporation; author: H. Peter Anvin |
---|
5 | ;; |
---|
6 | ;; This program is free software; you can redistribute it and/or modify |
---|
7 | ;; it under the terms of the GNU General Public License as published by |
---|
8 | ;; the Free Software Foundation, Inc., 53 Temple Place Ste 330, |
---|
9 | ;; Boston MA 02111-1307, USA; either version 2 of the License, or |
---|
10 | ;; (at your option) any later version; incorporated herein by reference. |
---|
11 | ;; |
---|
12 | ;; ----------------------------------------------------------------------- |
---|
13 | |
---|
14 | ;; |
---|
15 | ;; bcopy32xx.inc |
---|
16 | ;; |
---|
17 | |
---|
18 | |
---|
19 | ; |
---|
20 | ; 32-bit bcopy routine |
---|
21 | ; |
---|
22 | ; This is the actual 32-bit portion of the bcopy and shuffle and boot |
---|
23 | ; routines. ALL THIS CODE NEEDS TO BE POSITION-INDEPENDENT, with the |
---|
24 | ; sole exception being the actual relocation code at the beginning of |
---|
25 | ; pm_shuffle_boot. |
---|
26 | ; |
---|
27 | ; It also really needs to live all in a single segment, for the |
---|
28 | ; address calculcations to actually work. |
---|
29 | ; |
---|
30 | |
---|
31 | bits 32 |
---|
32 | section .bcopyxx.text |
---|
33 | align 16 |
---|
34 | ; |
---|
35 | ; pm_bcopy: |
---|
36 | ; |
---|
37 | ; This is the protected-mode core of the "bcopy" routine. |
---|
38 | ; Try to do aligned transfers; if the src and dst are relatively |
---|
39 | ; misaligned, align the dst. |
---|
40 | ; |
---|
41 | ; ECX is guaranteed to not be zero on entry. |
---|
42 | ; |
---|
43 | ; Clobbers ESI, EDI, ECX. |
---|
44 | ; |
---|
45 | |
---|
46 | pm_bcopy: |
---|
47 | push ebx |
---|
48 | push edx |
---|
49 | push eax |
---|
50 | |
---|
51 | cmp esi,-1 |
---|
52 | je .bzero |
---|
53 | |
---|
54 | cmp esi,edi ; If source < destination, we might |
---|
55 | jb .reverse ; have to copy backwards |
---|
56 | |
---|
57 | .forward: |
---|
58 | ; Initial alignment |
---|
59 | mov edx,edi |
---|
60 | shr edx,1 |
---|
61 | jnc .faa1 |
---|
62 | movsb |
---|
63 | dec ecx |
---|
64 | .faa1: |
---|
65 | mov al,cl |
---|
66 | cmp ecx,2 |
---|
67 | jb .f_tiny |
---|
68 | |
---|
69 | shr edx,1 |
---|
70 | jnc .faa2 |
---|
71 | movsw |
---|
72 | sub ecx,2 |
---|
73 | .faa2: |
---|
74 | |
---|
75 | ; Bulk transfer |
---|
76 | mov al,cl ; Save low bits |
---|
77 | shr ecx,2 ; Convert to dwords |
---|
78 | rep movsd ; Do our business |
---|
79 | ; At this point ecx == 0 |
---|
80 | |
---|
81 | test al,2 |
---|
82 | jz .fab2 |
---|
83 | movsw |
---|
84 | .fab2: |
---|
85 | .f_tiny: |
---|
86 | test al,1 |
---|
87 | jz .fab1 |
---|
88 | movsb |
---|
89 | .fab1: |
---|
90 | .done: |
---|
91 | pop eax |
---|
92 | pop edx |
---|
93 | pop ebx |
---|
94 | ret |
---|
95 | |
---|
96 | .reverse: |
---|
97 | lea eax,[esi+ecx-1] ; Point to final byte |
---|
98 | cmp edi,eax |
---|
99 | ja .forward ; No overlap, do forward copy |
---|
100 | |
---|
101 | std ; Reverse copy |
---|
102 | lea edi,[edi+ecx-1] |
---|
103 | mov esi,eax |
---|
104 | |
---|
105 | ; Initial alignment |
---|
106 | mov edx,edi |
---|
107 | shr edx,1 |
---|
108 | jc .raa1 |
---|
109 | movsb |
---|
110 | dec ecx |
---|
111 | .raa1: |
---|
112 | |
---|
113 | dec esi |
---|
114 | dec edi |
---|
115 | mov al,cl |
---|
116 | cmp ecx,2 |
---|
117 | jb .r_tiny |
---|
118 | shr edx,1 |
---|
119 | jc .raa2 |
---|
120 | movsw |
---|
121 | sub ecx,2 |
---|
122 | .raa2: |
---|
123 | |
---|
124 | ; Bulk copy |
---|
125 | sub esi,2 |
---|
126 | sub edi,2 |
---|
127 | mov al,cl ; Save low bits |
---|
128 | shr ecx,2 |
---|
129 | rep movsd |
---|
130 | |
---|
131 | ; Final alignment |
---|
132 | .r_final: |
---|
133 | add esi,2 |
---|
134 | add edi,2 |
---|
135 | test al,2 |
---|
136 | jz .rab2 |
---|
137 | movsw |
---|
138 | .rab2: |
---|
139 | .r_tiny: |
---|
140 | inc esi |
---|
141 | inc edi |
---|
142 | test al,1 |
---|
143 | jz .rab1 |
---|
144 | movsb |
---|
145 | .rab1: |
---|
146 | cld |
---|
147 | jmp short .done |
---|
148 | |
---|
149 | .bzero: |
---|
150 | xor eax,eax |
---|
151 | |
---|
152 | ; Initial alignment |
---|
153 | mov edx,edi |
---|
154 | shr edx,1 |
---|
155 | jnc .zaa1 |
---|
156 | stosb |
---|
157 | dec ecx |
---|
158 | .zaa1: |
---|
159 | |
---|
160 | mov bl,cl |
---|
161 | cmp ecx,2 |
---|
162 | jb .z_tiny |
---|
163 | shr edx,1 |
---|
164 | jnc .zaa2 |
---|
165 | stosw |
---|
166 | sub ecx,2 |
---|
167 | .zaa2: |
---|
168 | |
---|
169 | ; Bulk |
---|
170 | mov bl,cl ; Save low bits |
---|
171 | shr ecx,2 |
---|
172 | rep stosd |
---|
173 | |
---|
174 | test bl,2 |
---|
175 | jz .zab2 |
---|
176 | stosw |
---|
177 | .zab2: |
---|
178 | .z_tiny: |
---|
179 | test bl,1 |
---|
180 | jz .zab1 |
---|
181 | stosb |
---|
182 | .zab1: |
---|
183 | jmp short .done |
---|
184 | |
---|
185 | ; |
---|
186 | ; shuffle_and_boot: |
---|
187 | ; |
---|
188 | ; This routine is used to shuffle memory around, followed by |
---|
189 | ; invoking an entry point somewhere in low memory. This routine |
---|
190 | ; can clobber any memory outside the bcopy special area. |
---|
191 | ; |
---|
192 | ; IMPORTANT: This routine does not set up any registers. |
---|
193 | ; It is the responsibility of the caller to generate an appropriate entry |
---|
194 | ; stub; *especially* when going to real mode. |
---|
195 | ; |
---|
196 | ; Inputs: |
---|
197 | ; ESI -> Pointer to list of (dst, src, len) pairs(*) |
---|
198 | ; EDI -> Pointer to safe area for list + shuffler |
---|
199 | ; (must not overlap this code nor the RM stack) |
---|
200 | ; ECX -> Byte count of list area (for initial copy) |
---|
201 | ; |
---|
202 | ; If src == -1: then the memory pointed to by (dst, len) is bzeroed; |
---|
203 | ; this is handled inside the bcopy routine. |
---|
204 | ; |
---|
205 | ; If len == 0: this marks the end of the list; dst indicates |
---|
206 | ; the entry point and src the mode (0 = pm, 1 = rm) |
---|
207 | ; |
---|
208 | ; (*) dst, src, and len are four bytes each |
---|
209 | ; |
---|
210 | ; do_raw_shuffle_and_boot is the same entry point, but with a C ABI: |
---|
211 | ; do_raw_shuffle_and_boot(safearea, descriptors, bytecount) |
---|
212 | ; |
---|
213 | global do_raw_shuffle_and_boot |
---|
214 | do_raw_shuffle_and_boot: |
---|
215 | mov edi,eax |
---|
216 | mov esi,edx |
---|
217 | |
---|
218 | pm_shuffle: |
---|
219 | cli ; End interrupt service (for good) |
---|
220 | mov ebx,edi ; EBX <- descriptor list |
---|
221 | lea edx,[edi+ecx+15] ; EDX <- where to relocate our code to |
---|
222 | and edx,~15 ; Align 16 to benefit the GDT |
---|
223 | call pm_bcopy |
---|
224 | mov esi,__bcopyxx_start ; Absolute source address |
---|
225 | mov edi,edx ; Absolute target address |
---|
226 | sub edx,esi ; EDX <- address delta |
---|
227 | mov ecx,__bcopyxx_dwords |
---|
228 | lea eax,[edx+.safe] ; Resume point |
---|
229 | ; Relocate this code |
---|
230 | rep movsd |
---|
231 | jmp eax ; Jump to safe location |
---|
232 | .safe: |
---|
233 | ; Give ourselves a safe stack |
---|
234 | lea esp,[edx+bcopyxx_stack+__bcopyxx_end] |
---|
235 | add edx,bcopy_gdt ; EDX <- new GDT |
---|
236 | mov [edx+2],edx ; GDT self-pointer |
---|
237 | lgdt [edx] ; Switch to local GDT |
---|
238 | |
---|
239 | ; Now for the actual shuffling... |
---|
240 | .loop: |
---|
241 | mov edi,[ebx] |
---|
242 | mov esi,[ebx+4] |
---|
243 | mov ecx,[ebx+8] |
---|
244 | add ebx,12 |
---|
245 | jecxz .done |
---|
246 | call pm_bcopy |
---|
247 | jmp .loop |
---|
248 | .done: |
---|
249 | lidt [edx+RM_IDT_ptr-bcopy_gdt] ; RM-like IDT |
---|
250 | push ecx ; == 0, for cleaning the flags register |
---|
251 | and esi,esi |
---|
252 | jz pm_shuffle_16 |
---|
253 | popfd ; Clean the flags |
---|
254 | jmp edi ; Protected mode entry |
---|
255 | |
---|
256 | ; We have a 16-bit entry point, so we need to return |
---|
257 | ; to 16-bit mode. Note: EDX already points to the GDT. |
---|
258 | pm_shuffle_16: |
---|
259 | mov eax,edi |
---|
260 | mov [edx+PM_CS16+2],ax |
---|
261 | mov [edx+PM_DS16+2],ax |
---|
262 | shr eax,16 |
---|
263 | mov [edx+PM_CS16+4],al |
---|
264 | mov [edx+PM_CS16+7],ah |
---|
265 | mov [edx+PM_DS16+4],al |
---|
266 | mov [edx+PM_DS16+7],ah |
---|
267 | mov eax,cr0 |
---|
268 | and al,~1 |
---|
269 | popfd ; Clean the flags |
---|
270 | ; No flag-changing instructions below... |
---|
271 | mov dx,PM_DS16 |
---|
272 | mov ds,edx |
---|
273 | mov es,edx |
---|
274 | mov fs,edx |
---|
275 | mov gs,edx |
---|
276 | mov ss,edx |
---|
277 | jmp PM_CS16:0 |
---|
278 | |
---|
279 | section .bcopyxx.data |
---|
280 | |
---|
281 | alignz 16 |
---|
282 | ; GDT descriptor entry |
---|
283 | %macro desc 1 |
---|
284 | bcopy_gdt.%1: |
---|
285 | PM_%1 equ bcopy_gdt.%1-bcopy_gdt |
---|
286 | %endmacro |
---|
287 | |
---|
288 | bcopy_gdt: |
---|
289 | dw bcopy_gdt_size-1 ; Null descriptor - contains GDT |
---|
290 | dd bcopy_gdt ; pointer for LGDT instruction |
---|
291 | dw 0 |
---|
292 | |
---|
293 | ; TSS segment to keep Intel VT happy. Intel VT is |
---|
294 | ; unhappy about anything that doesn't smell like a |
---|
295 | ; full-blown 32-bit OS. |
---|
296 | desc TSS |
---|
297 | dw 104-1, DummyTSS ; 08h 32-bit task state segment |
---|
298 | dd 00008900h ; present, dpl 0, 104 bytes @DummyTSS |
---|
299 | |
---|
300 | desc CS16 |
---|
301 | dd 0000ffffh ; 10h Code segment, use16, readable, |
---|
302 | dd 00009b00h ; present, dpl 0, cover 64K |
---|
303 | desc DS16 |
---|
304 | dd 0000ffffh ; 18h Data segment, use16, read/write, |
---|
305 | dd 00009300h ; present, dpl 0, cover 64K |
---|
306 | desc CS32 |
---|
307 | dd 0000ffffh ; 20h Code segment, use32, readable, |
---|
308 | dd 00cf9b00h ; present, dpl 0, cover all 4G |
---|
309 | desc DS32 |
---|
310 | dd 0000ffffh ; 28h Data segment, use32, read/write, |
---|
311 | dd 00cf9300h ; present, dpl 0, cover all 4G |
---|
312 | |
---|
313 | bcopy_gdt_size: equ $-bcopy_gdt |
---|
314 | ; |
---|
315 | ; Space for a dummy task state segment. It should never be actually |
---|
316 | ; accessed, but just in case it is, point to a chunk of memory that |
---|
317 | ; has a chance to not be used for anything real... |
---|
318 | ; |
---|
319 | DummyTSS equ 0x580 |
---|
320 | |
---|
321 | align 4 |
---|
322 | RM_IDT_ptr: dw 0FFFFh ; Length (nonsense, but matches CPU) |
---|
323 | dd 0 ; Offset |
---|
324 | |
---|
325 | bcopyxx_stack equ 128 ; We want this much stack |
---|
326 | |
---|
327 | section .rodata |
---|
328 | global __syslinux_shuffler_size |
---|
329 | extern __bcopyxx_len |
---|
330 | align 4 |
---|
331 | __syslinux_shuffler_size: |
---|
332 | dd __bcopyxx_len |
---|
333 | |
---|
334 | bits 16 |
---|
335 | section .text16 |
---|