Mercurial > sdl-ios-xcode
comparison src/stdlib/SDL_qsort.c @ 1330:450721ad5436
It's now possible to build SDL without any C runtime at all on Windows,
using Visual C++ 2005
author | Sam Lantinga <slouken@libsdl.org> |
---|---|
date | Mon, 06 Feb 2006 08:28:51 +0000 |
parents | |
children | 1cbaeee565b1 |
comparison
equal
deleted
inserted
replaced
1329:bc67bbf87818 | 1330:450721ad5436 |
---|---|
1 /* qsort.c | |
2 * (c) 1998 Gareth McCaughan | |
3 * | |
4 * This is a drop-in replacement for the C library's |qsort()| routine. | |
5 * | |
6 * Features: | |
7 * - Median-of-three pivoting (and more) | |
8 * - Truncation and final polishing by a single insertion sort | |
9 * - Early truncation when no swaps needed in pivoting step | |
10 * - Explicit recursion, guaranteed not to overflow | |
11 * - A few little wrinkles stolen from the GNU |qsort()|. | |
12 * - separate code for non-aligned / aligned / word-size objects | |
13 * | |
14 * This code may be reproduced freely provided | |
15 * - this file is retained unaltered apart from minor | |
16 * changes for portability and efficiency | |
17 * - no changes are made to this comment | |
18 * - any changes that *are* made are clearly flagged | |
19 * - the _ID string below is altered by inserting, after | |
20 * the date, the string " altered" followed at your option | |
21 * by other material. (Exceptions: you may change the name | |
22 * of the exported routine without changing the ID string. | |
23 * You may change the values of the macros TRUNC_* and | |
24 * PIVOT_THRESHOLD without changing the ID string, provided | |
25 * they remain constants with TRUNC_nonaligned, TRUNC_aligned | |
26 * and TRUNC_words/WORD_BYTES between 8 and 24, and | |
27 * PIVOT_THRESHOLD between 32 and 200.) | |
28 * | |
29 * You may use it in anything you like; you may make money | |
30 * out of it; you may distribute it in object form or as | |
31 * part of an executable without including source code; | |
32 * you don't have to credit me. (But it would be nice if | |
33 * you did.) | |
34 * | |
35 * If you find problems with this code, or find ways of | |
36 * making it significantly faster, please let me know! | |
37 * My e-mail address, valid as of early 1998 and certainly | |
38 * OK for at least the next 18 months, is | |
39 * gjm11@dpmms.cam.ac.uk | |
40 * Thanks! | |
41 * | |
42 * Gareth McCaughan Peterhouse Cambridge 1998 | |
43 */ | |
44 | |
45 /* | |
46 #include <assert.h> | |
47 #include <stdlib.h> | |
48 #include <string.h> | |
49 */ | |
50 #define assert(X) | |
51 #include "SDL_stdlib.h" | |
52 #include "SDL_string.h" | |
53 | |
54 #ifndef HAVE_QSORT | |
55 | |
56 static char _ID[]="<qsort.c gjm 1.12 1998-03-19>"; | |
57 | |
58 /* How many bytes are there per word? (Must be a power of 2, | |
59 * and must in fact equal sizeof(int).) | |
60 */ | |
61 #define WORD_BYTES sizeof(int) | |
62 | |
63 /* How big does our stack need to be? Answer: one entry per | |
64 * bit in a |size_t|. | |
65 */ | |
66 #define STACK_SIZE (8*sizeof(size_t)) | |
67 | |
68 /* Different situations have slightly different requirements, | |
69 * and we make life epsilon easier by using different truncation | |
70 * points for the three different cases. | |
71 * So far, I have tuned TRUNC_words and guessed that the same | |
72 * value might work well for the other two cases. Of course | |
73 * what works well on my machine might work badly on yours. | |
74 */ | |
75 #define TRUNC_nonaligned 12 | |
76 #define TRUNC_aligned 12 | |
77 #define TRUNC_words 12*WORD_BYTES /* nb different meaning */ | |
78 | |
79 /* We use a simple pivoting algorithm for shortish sub-arrays | |
80 * and a more complicated one for larger ones. The threshold | |
81 * is PIVOT_THRESHOLD. | |
82 */ | |
83 #define PIVOT_THRESHOLD 40 | |
84 | |
85 typedef struct { char * first; char * last; } stack_entry; | |
86 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;} | |
87 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;} | |
88 #define doLeft {first=ffirst;llast=last;continue;} | |
89 #define doRight {ffirst=first;last=llast;continue;} | |
90 #define pop {if (--stacktop<0) break;\ | |
91 first=ffirst=stack[stacktop].first;\ | |
92 last=llast=stack[stacktop].last;\ | |
93 continue;} | |
94 | |
95 /* Some comments on the implementation. | |
96 * 1. When we finish partitioning the array into "low" | |
97 * and "high", we forget entirely about short subarrays, | |
98 * because they'll be done later by insertion sort. | |
99 * Doing lots of little insertion sorts might be a win | |
100 * on large datasets for locality-of-reference reasons, | |
101 * but it makes the code much nastier and increases | |
102 * bookkeeping overhead. | |
103 * 2. We always save the shorter and get to work on the | |
104 * longer. This guarantees that every time we push | |
105 * an item onto the stack its size is <= 1/2 of that | |
106 * of its parent; so the stack can't need more than | |
107 * log_2(max-array-size) entries. | |
108 * 3. We choose a pivot by looking at the first, last | |
109 * and middle elements. We arrange them into order | |
110 * because it's easy to do that in conjunction with | |
111 * choosing the pivot, and it makes things a little | |
112 * easier in the partitioning step. Anyway, the pivot | |
113 * is the middle of these three. It's still possible | |
114 * to construct datasets where the algorithm takes | |
115 * time of order n^2, but it simply never happens in | |
116 * practice. | |
117 * 3' Newsflash: On further investigation I find that | |
118 * it's easy to construct datasets where median-of-3 | |
119 * simply isn't good enough. So on large-ish subarrays | |
120 * we do a more sophisticated pivoting: we take three | |
121 * sets of 3 elements, find their medians, and then | |
122 * take the median of those. | |
123 * 4. We copy the pivot element to a separate place | |
124 * because that way we can always do our comparisons | |
125 * directly against a pointer to that separate place, | |
126 * and don't have to wonder "did we move the pivot | |
127 * element?". This makes the inner loop better. | |
128 * 5. It's possible to make the pivoting even more | |
129 * reliable by looking at more candidates when n | |
130 * is larger. (Taking this to its logical conclusion | |
131 * results in a variant of quicksort that doesn't | |
132 * have that n^2 worst case.) However, the overhead | |
133 * from the extra bookkeeping means that it's just | |
134 * not worth while. | |
135 * 6. This is pretty clean and portable code. Here are | |
136 * all the potential portability pitfalls and problems | |
137 * I know of: | |
138 * - In one place (the insertion sort) I construct | |
139 * a pointer that points just past the end of the | |
140 * supplied array, and assume that (a) it won't | |
141 * compare equal to any pointer within the array, | |
142 * and (b) it will compare equal to a pointer | |
143 * obtained by stepping off the end of the array. | |
144 * These might fail on some segmented architectures. | |
145 * - I assume that there are 8 bits in a |char| when | |
146 * computing the size of stack needed. This would | |
147 * fail on machines with 9-bit or 16-bit bytes. | |
148 * - I assume that if |((int)base&(sizeof(int)-1))==0| | |
149 * and |(size&(sizeof(int)-1))==0| then it's safe to | |
150 * get at array elements via |int*|s, and that if | |
151 * actually |size==sizeof(int)| as well then it's | |
152 * safe to treat the elements as |int|s. This might | |
153 * fail on systems that convert pointers to integers | |
154 * in non-standard ways. | |
155 * - I assume that |8*sizeof(size_t)<=INT_MAX|. This | |
156 * would be false on a machine with 8-bit |char|s, | |
157 * 16-bit |int|s and 4096-bit |size_t|s. :-) | |
158 */ | |
159 | |
160 /* The recursion logic is the same in each case: */ | |
161 #define Recurse(Trunc) \ | |
162 { size_t l=last-ffirst,r=llast-first; \ | |
163 if (l<Trunc) { \ | |
164 if (r>=Trunc) doRight \ | |
165 else pop \ | |
166 } \ | |
167 else if (l<=r) { pushLeft; doRight } \ | |
168 else if (r>=Trunc) { pushRight; doLeft }\ | |
169 else doLeft \ | |
170 } | |
171 | |
172 /* and so is the pivoting logic: */ | |
173 #define Pivot(swapper,sz) \ | |
174 if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\ | |
175 else { \ | |
176 if (compare(first,mid)<0) { \ | |
177 if (compare(mid,last)>0) { \ | |
178 swapper(mid,last); \ | |
179 if (compare(first,mid)>0) swapper(first,mid);\ | |
180 } \ | |
181 } \ | |
182 else { \ | |
183 if (compare(mid,last)>0) swapper(first,last)\ | |
184 else { \ | |
185 swapper(first,mid); \ | |
186 if (compare(mid,last)>0) swapper(mid,last);\ | |
187 } \ | |
188 } \ | |
189 first+=sz; last-=sz; \ | |
190 } | |
191 | |
192 #ifdef DEBUG_QSORT | |
193 #include <stdio.h> | |
194 #endif | |
195 | |
196 /* and so is the partitioning logic: */ | |
197 #define Partition(swapper,sz) { \ | |
198 int swapped=0; \ | |
199 do { \ | |
200 while (compare(first,pivot)<0) first+=sz; \ | |
201 while (compare(pivot,last)<0) last-=sz; \ | |
202 if (first<last) { \ | |
203 swapper(first,last); swapped=1; \ | |
204 first+=sz; last-=sz; } \ | |
205 else if (first==last) { first+=sz; last-=sz; break; }\ | |
206 } while (first<=last); \ | |
207 if (!swapped) pop \ | |
208 } | |
209 | |
210 /* and so is the pre-insertion-sort operation of putting | |
211 * the smallest element into place as a sentinel. | |
212 * Doing this makes the inner loop nicer. I got this | |
213 * idea from the GNU implementation of qsort(). | |
214 */ | |
215 #define PreInsertion(swapper,limit,sz) \ | |
216 first=base; \ | |
217 last=first + (nmemb>limit ? limit : nmemb-1)*sz;\ | |
218 while (last!=base) { \ | |
219 if (compare(first,last)>0) first=last; \ | |
220 last-=sz; } \ | |
221 if (first!=base) swapper(first,(char*)base); | |
222 | |
223 /* and so is the insertion sort, in the first two cases: */ | |
224 #define Insertion(swapper) \ | |
225 last=((char*)base)+nmemb*size; \ | |
226 for (first=((char*)base)+size;first!=last;first+=size) { \ | |
227 char *test; \ | |
228 /* Find the right place for |first|. \ | |
229 * My apologies for var reuse. */ \ | |
230 for (test=first-size;compare(test,first)>0;test-=size) ; \ | |
231 test+=size; \ | |
232 if (test!=first) { \ | |
233 /* Shift everything in [test,first) \ | |
234 * up by one, and place |first| \ | |
235 * where |test| is. */ \ | |
236 memcpy(pivot,first,size); \ | |
237 memmove(test+size,test,first-test); \ | |
238 memcpy(test,pivot,size); \ | |
239 } \ | |
240 } | |
241 | |
242 #define SWAP_nonaligned(a,b) { \ | |
243 register char *aa=(a),*bb=(b); \ | |
244 register size_t sz=size; \ | |
245 do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); } | |
246 | |
247 #define SWAP_aligned(a,b) { \ | |
248 register int *aa=(int*)(a),*bb=(int*)(b); \ | |
249 register size_t sz=size; \ | |
250 do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); } | |
251 | |
252 #define SWAP_words(a,b) { \ | |
253 register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; } | |
254 | |
255 /* ---------------------------------------------------------------------- */ | |
256 | |
257 static char * pivot_big(char *first, char *mid, char *last, size_t size, | |
258 int compare(const void *, const void *)) { | |
259 int d=(((last-first)/size)>>3)*size; | |
260 char *m1,*m2,*m3; | |
261 { char *a=first, *b=first+d, *c=first+2*d; | |
262 #ifdef DEBUG_QSORT | |
263 fprintf(stderr,"< %d %d %d\n",*(int*)a,*(int*)b,*(int*)c); | |
264 #endif | |
265 m1 = compare(a,b)<0 ? | |
266 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) | |
267 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); | |
268 } | |
269 { char *a=mid-d, *b=mid, *c=mid+d; | |
270 #ifdef DEBUG_QSORT | |
271 fprintf(stderr,". %d %d %d\n",*(int*)a,*(int*)b,*(int*)c); | |
272 #endif | |
273 m2 = compare(a,b)<0 ? | |
274 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) | |
275 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); | |
276 } | |
277 { char *a=last-2*d, *b=last-d, *c=last; | |
278 #ifdef DEBUG_QSORT | |
279 fprintf(stderr,"> %d %d %d\n",*(int*)a,*(int*)b,*(int*)c); | |
280 #endif | |
281 m3 = compare(a,b)<0 ? | |
282 (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a)) | |
283 : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b)); | |
284 } | |
285 #ifdef DEBUG_QSORT | |
286 fprintf(stderr,"-> %d %d %d\n",*(int*)m1,*(int*)m2,*(int*)m3); | |
287 #endif | |
288 return compare(m1,m2)<0 ? | |
289 (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1)) | |
290 : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2)); | |
291 } | |
292 | |
293 /* ---------------------------------------------------------------------- */ | |
294 | |
295 static void qsort_nonaligned(void *base, size_t nmemb, size_t size, | |
296 int (*compare)(const void *, const void *)) { | |
297 | |
298 stack_entry stack[STACK_SIZE]; | |
299 int stacktop=0; | |
300 char *first,*last; | |
301 char *pivot=malloc(size); | |
302 size_t trunc=TRUNC_nonaligned*size; | |
303 assert(pivot!=0); | |
304 | |
305 first=(char*)base; last=first+(nmemb-1)*size; | |
306 | |
307 if ((size_t)(last-first)>trunc) { | |
308 char *ffirst=first, *llast=last; | |
309 while (1) { | |
310 /* Select pivot */ | |
311 { char * mid=first+size*((last-first)/size >> 1); | |
312 Pivot(SWAP_nonaligned,size); | |
313 memcpy(pivot,mid,size); | |
314 } | |
315 /* Partition. */ | |
316 Partition(SWAP_nonaligned,size); | |
317 /* Prepare to recurse/iterate. */ | |
318 Recurse(trunc) | |
319 } | |
320 } | |
321 PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size); | |
322 Insertion(SWAP_nonaligned); | |
323 free(pivot); | |
324 } | |
325 | |
326 static void qsort_aligned(void *base, size_t nmemb, size_t size, | |
327 int (*compare)(const void *, const void *)) { | |
328 | |
329 stack_entry stack[STACK_SIZE]; | |
330 int stacktop=0; | |
331 char *first,*last; | |
332 char *pivot=malloc(size); | |
333 size_t trunc=TRUNC_aligned*size; | |
334 assert(pivot!=0); | |
335 | |
336 first=(char*)base; last=first+(nmemb-1)*size; | |
337 | |
338 if ((size_t)(last-first)>trunc) { | |
339 char *ffirst=first,*llast=last; | |
340 while (1) { | |
341 /* Select pivot */ | |
342 { char * mid=first+size*((last-first)/size >> 1); | |
343 Pivot(SWAP_aligned,size); | |
344 memcpy(pivot,mid,size); | |
345 } | |
346 /* Partition. */ | |
347 Partition(SWAP_aligned,size); | |
348 /* Prepare to recurse/iterate. */ | |
349 Recurse(trunc) | |
350 } | |
351 } | |
352 PreInsertion(SWAP_aligned,TRUNC_aligned,size); | |
353 Insertion(SWAP_aligned); | |
354 free(pivot); | |
355 } | |
356 | |
357 static void qsort_words(void *base, size_t nmemb, | |
358 int (*compare)(const void *, const void *)) { | |
359 | |
360 stack_entry stack[STACK_SIZE]; | |
361 int stacktop=0; | |
362 char *first,*last; | |
363 char *pivot=malloc(WORD_BYTES); | |
364 assert(pivot!=0); | |
365 | |
366 first=(char*)base; last=first+(nmemb-1)*WORD_BYTES; | |
367 | |
368 if (last-first>TRUNC_words) { | |
369 char *ffirst=first, *llast=last; | |
370 while (1) { | |
371 #ifdef DEBUG_QSORT | |
372 fprintf(stderr,"Doing %d:%d: ", | |
373 (first-(char*)base)/WORD_BYTES, | |
374 (last-(char*)base)/WORD_BYTES); | |
375 #endif | |
376 /* Select pivot */ | |
377 { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES)); | |
378 Pivot(SWAP_words,WORD_BYTES); | |
379 *(int*)pivot=*(int*)mid; | |
380 } | |
381 #ifdef DEBUG_QSORT | |
382 fprintf(stderr,"pivot=%d\n",*(int*)pivot); | |
383 #endif | |
384 /* Partition. */ | |
385 Partition(SWAP_words,WORD_BYTES); | |
386 /* Prepare to recurse/iterate. */ | |
387 Recurse(TRUNC_words) | |
388 } | |
389 } | |
390 PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES); | |
391 /* Now do insertion sort. */ | |
392 last=((char*)base)+nmemb*WORD_BYTES; | |
393 for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) { | |
394 /* Find the right place for |first|. My apologies for var reuse */ | |
395 int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first; | |
396 *(int*)pivot=*(int*)first; | |
397 for (;compare(pl,pivot)>0;pr=pl,--pl) { | |
398 *pr=*pl; } | |
399 if (pr!=(int*)first) *pr=*(int*)pivot; | |
400 } | |
401 free(pivot); | |
402 } | |
403 | |
404 /* ---------------------------------------------------------------------- */ | |
405 | |
406 void SDL_qsort(void *base, size_t nmemb, size_t size, | |
407 int (*compare)(const void *, const void *)) { | |
408 | |
409 if (nmemb<=1) return; | |
410 if (((int)base|size)&(WORD_BYTES-1)) | |
411 qsort_nonaligned(base,nmemb,size,compare); | |
412 else if (size!=WORD_BYTES) | |
413 qsort_aligned(base,nmemb,size,compare); | |
414 else | |
415 qsort_words(base,nmemb,compare); | |
416 } | |
417 | |
418 #endif /* !HAVE_QSORT */ |