1 | #ifndef ETHERBOOT_BITS_STRING_H |
---|
2 | #define ETHERBOOT_BITS_STRING_H |
---|
3 | /* |
---|
4 | * Taken from Linux /usr/include/asm/string.h |
---|
5 | * All except memcpy, memmove, memset and memcmp removed. |
---|
6 | * |
---|
7 | * Non-standard memswap() function added because it saves quite a bit |
---|
8 | * of code (mbrown@fensystems.co.uk). |
---|
9 | */ |
---|
10 | |
---|
11 | /* |
---|
12 | * This string-include defines all string functions as inline |
---|
13 | * functions. Use gcc. It also assumes ds=es=data space, this should be |
---|
14 | * normal. Most of the string-functions are rather heavily hand-optimized, |
---|
15 | * see especially strtok,strstr,str[c]spn. They should work, but are not |
---|
16 | * very easy to understand. Everything is done entirely within the register |
---|
17 | * set, making the functions fast and clean. String instructions have been |
---|
18 | * used through-out, making for "slightly" unclear code :-) |
---|
19 | * |
---|
20 | * NO Copyright (C) 1991, 1992 Linus Torvalds, |
---|
21 | * consider these trivial functions to be PD. |
---|
22 | */ |
---|
23 | |
---|
24 | FILE_LICENCE ( PUBLIC_DOMAIN ); |
---|
25 | |
---|
26 | #define __HAVE_ARCH_MEMCPY |
---|
27 | |
---|
28 | extern void * __memcpy ( void *dest, const void *src, size_t len ); |
---|
29 | |
---|
30 | #if 0 |
---|
31 | static inline __attribute__ (( always_inline )) void * |
---|
32 | __memcpy ( void *dest, const void *src, size_t len ) { |
---|
33 | int d0, d1, d2; |
---|
34 | __asm__ __volatile__ ( "rep ; movsb" |
---|
35 | : "=&c" ( d0 ), "=&S" ( d1 ), "=&D" ( d2 ) |
---|
36 | : "0" ( len ), "1" ( src ), "2" ( dest ) |
---|
37 | : "memory" ); |
---|
38 | return dest; |
---|
39 | } |
---|
40 | #endif |
---|
41 | |
---|
42 | static inline __attribute__ (( always_inline )) void * |
---|
43 | __constant_memcpy ( void *dest, const void *src, size_t len ) { |
---|
44 | union { |
---|
45 | uint32_t u32[2]; |
---|
46 | uint16_t u16[4]; |
---|
47 | uint8_t u8[8]; |
---|
48 | } __attribute__ (( __may_alias__ )) *dest_u = dest; |
---|
49 | const union { |
---|
50 | uint32_t u32[2]; |
---|
51 | uint16_t u16[4]; |
---|
52 | uint8_t u8[8]; |
---|
53 | } __attribute__ (( __may_alias__ )) *src_u = src; |
---|
54 | const void *esi; |
---|
55 | void *edi; |
---|
56 | |
---|
57 | switch ( len ) { |
---|
58 | case 0 : /* 0 bytes */ |
---|
59 | return dest; |
---|
60 | /* |
---|
61 | * Single-register moves; these are always better than a |
---|
62 | * string operation. We can clobber an arbitrary two |
---|
63 | * registers (data, source, dest can re-use source register) |
---|
64 | * instead of being restricted to esi and edi. There's also a |
---|
65 | * much greater potential for optimising with nearby code. |
---|
66 | * |
---|
67 | */ |
---|
68 | case 1 : /* 4 bytes */ |
---|
69 | dest_u->u8[0] = src_u->u8[0]; |
---|
70 | return dest; |
---|
71 | case 2 : /* 6 bytes */ |
---|
72 | dest_u->u16[0] = src_u->u16[0]; |
---|
73 | return dest; |
---|
74 | case 4 : /* 4 bytes */ |
---|
75 | dest_u->u32[0] = src_u->u32[0]; |
---|
76 | return dest; |
---|
77 | /* |
---|
78 | * Double-register moves; these are probably still a win. |
---|
79 | * |
---|
80 | */ |
---|
81 | case 3 : /* 12 bytes */ |
---|
82 | dest_u->u16[0] = src_u->u16[0]; |
---|
83 | dest_u->u8[2] = src_u->u8[2]; |
---|
84 | return dest; |
---|
85 | case 5 : /* 10 bytes */ |
---|
86 | dest_u->u32[0] = src_u->u32[0]; |
---|
87 | dest_u->u8[4] = src_u->u8[4]; |
---|
88 | return dest; |
---|
89 | case 6 : /* 12 bytes */ |
---|
90 | dest_u->u32[0] = src_u->u32[0]; |
---|
91 | dest_u->u16[2] = src_u->u16[2]; |
---|
92 | return dest; |
---|
93 | case 8 : /* 10 bytes */ |
---|
94 | dest_u->u32[0] = src_u->u32[0]; |
---|
95 | dest_u->u32[1] = src_u->u32[1]; |
---|
96 | return dest; |
---|
97 | } |
---|
98 | |
---|
99 | /* Even if we have to load up esi and edi ready for a string |
---|
100 | * operation, we can sometimes save space by using multiple |
---|
101 | * single-byte "movs" operations instead of loading up ecx and |
---|
102 | * using "rep movsb". |
---|
103 | * |
---|
104 | * "load ecx, rep movsb" is 7 bytes, plus an average of 1 byte |
---|
105 | * to allow for saving/restoring ecx 50% of the time. |
---|
106 | * |
---|
107 | * "movsl" and "movsb" are 1 byte each, "movsw" is two bytes. |
---|
108 | * (In 16-bit mode, "movsl" is 2 bytes and "movsw" is 1 byte, |
---|
109 | * but "movsl" moves twice as much data, so it balances out). |
---|
110 | * |
---|
111 | * The cutoff point therefore occurs around 26 bytes; the byte |
---|
112 | * requirements for each method are: |
---|
113 | * |
---|
114 | * len 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 |
---|
115 | * #bytes (ecx) 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 |
---|
116 | * #bytes (no ecx) 4 5 6 7 5 6 7 8 6 7 8 9 7 8 9 10 |
---|
117 | */ |
---|
118 | |
---|
119 | esi = src; |
---|
120 | edi = dest; |
---|
121 | |
---|
122 | if ( len >= 26 ) |
---|
123 | return __memcpy ( dest, src, len ); |
---|
124 | |
---|
125 | if ( len >= 6*4 ) |
---|
126 | __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) |
---|
127 | : "0" ( edi ), "1" ( esi ) : "memory" ); |
---|
128 | if ( len >= 5*4 ) |
---|
129 | __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) |
---|
130 | : "0" ( edi ), "1" ( esi ) : "memory" ); |
---|
131 | if ( len >= 4*4 ) |
---|
132 | __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) |
---|
133 | : "0" ( edi ), "1" ( esi ) : "memory" ); |
---|
134 | if ( len >= 3*4 ) |
---|
135 | __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) |
---|
136 | : "0" ( edi ), "1" ( esi ) : "memory" ); |
---|
137 | if ( len >= 2*4 ) |
---|
138 | __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) |
---|
139 | : "0" ( edi ), "1" ( esi ) : "memory" ); |
---|
140 | if ( len >= 1*4 ) |
---|
141 | __asm__ __volatile__ ( "movsl" : "=&D" ( edi ), "=&S" ( esi ) |
---|
142 | : "0" ( edi ), "1" ( esi ) : "memory" ); |
---|
143 | if ( ( len % 4 ) >= 2 ) |
---|
144 | __asm__ __volatile__ ( "movsw" : "=&D" ( edi ), "=&S" ( esi ) |
---|
145 | : "0" ( edi ), "1" ( esi ) : "memory" ); |
---|
146 | if ( ( len % 2 ) >= 1 ) |
---|
147 | __asm__ __volatile__ ( "movsb" : "=&D" ( edi ), "=&S" ( esi ) |
---|
148 | : "0" ( edi ), "1" ( esi ) : "memory" ); |
---|
149 | |
---|
150 | return dest; |
---|
151 | } |
---|
152 | |
---|
153 | #define memcpy( dest, src, len ) \ |
---|
154 | ( __builtin_constant_p ( (len) ) ? \ |
---|
155 | __constant_memcpy ( (dest), (src), (len) ) : \ |
---|
156 | __memcpy ( (dest), (src), (len) ) ) |
---|
157 | |
---|
158 | #define __HAVE_ARCH_MEMMOVE |
---|
159 | static inline void * memmove(void * dest,const void * src, size_t n) |
---|
160 | { |
---|
161 | int d0, d1, d2; |
---|
162 | if (dest<src) |
---|
163 | __asm__ __volatile__( |
---|
164 | "cld\n\t" |
---|
165 | "rep\n\t" |
---|
166 | "movsb" |
---|
167 | : "=&c" (d0), "=&S" (d1), "=&D" (d2) |
---|
168 | :"0" (n),"1" (src),"2" (dest) |
---|
169 | : "memory"); |
---|
170 | else |
---|
171 | __asm__ __volatile__( |
---|
172 | "std\n\t" |
---|
173 | "rep\n\t" |
---|
174 | "movsb\n\t" |
---|
175 | "cld" |
---|
176 | : "=&c" (d0), "=&S" (d1), "=&D" (d2) |
---|
177 | :"0" (n), |
---|
178 | "1" (n-1+(const char *)src), |
---|
179 | "2" (n-1+(char *)dest) |
---|
180 | :"memory"); |
---|
181 | return dest; |
---|
182 | } |
---|
183 | |
---|
184 | #define __HAVE_ARCH_MEMSET |
---|
185 | static inline void * memset(void *s, int c,size_t count) |
---|
186 | { |
---|
187 | int d0, d1; |
---|
188 | __asm__ __volatile__( |
---|
189 | "cld\n\t" |
---|
190 | "rep\n\t" |
---|
191 | "stosb" |
---|
192 | : "=&c" (d0), "=&D" (d1) |
---|
193 | :"a" (c),"1" (s),"0" (count) |
---|
194 | :"memory"); |
---|
195 | return s; |
---|
196 | } |
---|
197 | |
---|
198 | #define __HAVE_ARCH_MEMSWAP |
---|
199 | static inline void * memswap(void *dest, void *src, size_t n) |
---|
200 | { |
---|
201 | int d0, d1, d2, d3; |
---|
202 | __asm__ __volatile__( |
---|
203 | "\n1:\t" |
---|
204 | "movb (%%edi),%%al\n\t" |
---|
205 | "xchgb (%%esi),%%al\n\t" |
---|
206 | "incl %%esi\n\t" |
---|
207 | "stosb\n\t" |
---|
208 | "loop 1b" |
---|
209 | : "=&c" (d0), "=&S" (d1), "=&D" (d2), "=&a" (d3) |
---|
210 | : "0" (n), "1" (src), "2" (dest) |
---|
211 | : "memory" ); |
---|
212 | return dest; |
---|
213 | } |
---|
214 | |
---|
215 | #define __HAVE_ARCH_STRNCMP |
---|
216 | static inline int strncmp(const char * cs,const char * ct,size_t count) |
---|
217 | { |
---|
218 | register int __res; |
---|
219 | int d0, d1, d2; |
---|
220 | __asm__ __volatile__( |
---|
221 | "1:\tdecl %3\n\t" |
---|
222 | "js 2f\n\t" |
---|
223 | "lodsb\n\t" |
---|
224 | "scasb\n\t" |
---|
225 | "jne 3f\n\t" |
---|
226 | "testb %%al,%%al\n\t" |
---|
227 | "jne 1b\n" |
---|
228 | "2:\txorl %%eax,%%eax\n\t" |
---|
229 | "jmp 4f\n" |
---|
230 | "3:\tsbbl %%eax,%%eax\n\t" |
---|
231 | "orb $1,%%al\n" |
---|
232 | "4:" |
---|
233 | :"=a" (__res), "=&S" (d0), "=&D" (d1), "=&c" (d2) |
---|
234 | :"1" (cs),"2" (ct),"3" (count)); |
---|
235 | return __res; |
---|
236 | } |
---|
237 | |
---|
238 | #define __HAVE_ARCH_STRLEN |
---|
239 | static inline size_t strlen(const char * s) |
---|
240 | { |
---|
241 | int d0; |
---|
242 | register int __res; |
---|
243 | __asm__ __volatile__( |
---|
244 | "repne\n\t" |
---|
245 | "scasb\n\t" |
---|
246 | "notl %0\n\t" |
---|
247 | "decl %0" |
---|
248 | :"=c" (__res), "=&D" (d0) :"1" (s),"a" (0), "0" (0xffffffff)); |
---|
249 | return __res; |
---|
250 | } |
---|
251 | |
---|
252 | #endif /* ETHERBOOT_BITS_STRING_H */ |
---|