/****************************************************************************/ /*- * Copyright (c) 1992, 1993 * The Regents of the University of California. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #if defined(LIBC_SCCS) && !defined(lint) static char sccsid[] = "@(#)qsort.c 8.1 (Berkeley) 6/4/93"; #endif /* LIBC_SCCS and not lint */ #include __FBSDID("$FreeBSD: src/lib/libc/stdlib/qsort.c,v 1.15 2008/01/14 09:21:34 das Exp $"); #include #include #include #include #include #include #include #include #define __APPLE_API_PRIVATE #include #ifdef I_AM_PSORT_R typedef int cmp_t(void *, const void *, const void *); #else typedef int cmp_t(const void *, const void *); #endif #ifdef I_AM_PSORT_B static inline char *med3(char *, char *, char *, cmp_t ^, void *) __attribute__((always_inline)); #else static inline char *med3(char *, char *, char *, cmp_t *, void *) __attribute__((always_inline)); #endif static inline void swapfunc(char *, char *, int, int) __attribute__((always_inline)); #define min(a, b) (a) < (b) ? a : b #define NARGS ((PAGESIZE - offsetof(struct page, args)) / sizeof(union args)) #define PAGESIZE 4096 #define PARALLEL_MIN_SIZE 2000 /* determine heuristically */ struct shared; /* forward reference */ union args { union args *next; struct { struct shared *shared; void *a; size_t n; int depth_limit; } /* anonymous */; }; struct page { struct page *next; union args args[0]; }; struct shared { char *who; union args *freelist; struct page *pagelist; #ifdef I_AM_PSORT_R void *thunk; #endif #ifdef I_AM_PSORT_B cmp_t ^cmp; #else cmp_t *cmp; #endif size_t es; size_t turnoff; dispatch_queue_t queue; pthread_cond_t cond; pthread_mutex_t mutex; OSSpinLock sharedlock; int count; }; static union args * getargs(struct shared *shared) { union args *args; OSSpinLockLock(&shared->sharedlock); if(!shared->freelist) { struct page *page; union args *prev; int i; if((page = (struct page *)mmap(NULL, PAGESIZE, PROT_READ|PROT_WRITE, MAP_ANON|MAP_PRIVATE, -1, 0)) == NULL) return NULL; page->next = shared->pagelist; shared->pagelist = page; prev = NULL; for(args = page->args, i = NARGS; i > 0; args++, i--) { args->next = prev; prev = args; } shared->freelist = prev; } args = shared->freelist; shared->freelist = args->next; OSSpinLockUnlock(&shared->sharedlock); return args; } static void returnargs(struct shared *shared, union args *args) { OSSpinLockLock(&shared->sharedlock); args->next = shared->freelist; shared->freelist = args; OSSpinLockUnlock(&shared->sharedlock); } /* * Qsort routine from Bentley & McIlroy's "Engineering a Sort Function". */ #define swapcode(TYPE, parmi, parmj, n) { \ long i = (n) / sizeof (TYPE); \ TYPE *pi = (TYPE *) (parmi); \ TYPE *pj = (TYPE *) (parmj); \ do { \ TYPE t = *pi; \ *pi++ = *pj; \ *pj++ = t; \ } while (--i > 0); \ } #define SWAPINIT(a, es) swaptype = ((char *)a - (char *)0) % sizeof(long) || \ es % sizeof(long) ? 2 : es == sizeof(long)? 0 : 1; static inline void swapfunc(a, b, n, swaptype) char *a, *b; int n, swaptype; { if(swaptype <= 1) swapcode(long, a, b, n) else swapcode(char, a, b, n) } #define swap(a, b) \ if (swaptype == 0) { \ long t = *(long *)(a); \ *(long *)(a) = *(long *)(b); \ *(long *)(b) = t; \ } else \ swapfunc(a, b, es, swaptype) #define vecswap(a, b, n) if ((n) > 0) swapfunc(a, b, n, swaptype) #ifdef I_AM_PSORT_R #define CMP(t, x, y) (cmp((t), (x), (y))) #else #define CMP(t, x, y) (cmp((x), (y))) #endif static inline char * med3(char *a, char *b, char *c, #ifdef I_AM_PSORT_B cmp_t ^cmp, #else cmp_t *cmp, #endif void *thunk #ifndef I_AM_PSORT_R __unused #endif ) { return CMP(thunk, a, b) < 0 ? (CMP(thunk, b, c) < 0 ? b : (CMP(thunk, a, c) < 0 ? c : a )) :(CMP(thunk, b, c) > 0 ? b : (CMP(thunk, a, c) < 0 ? a : c )); } #ifdef __LP64__ #define DEPTH(x) (2 * (flsl((long)(x)) - 1)) #else /* !__LP64__ */ #define DEPTH(x) (2 * (fls((int)(x)) - 1)) #endif /* __LP64__ */ #ifdef I_AM_PSORT_R int __heapsort_r(void *, size_t, size_t, void *, int (*)(void *, const void *, const void *)); #endif static void _psort_parallel(void *x); static void _psort(void *a, size_t n, size_t es, #ifdef I_AM_PSORT_R void *thunk, #else #define thunk NULL #endif #ifdef I_AM_PSORT_B cmp_t ^cmp, #else cmp_t *cmp, #endif int depth_limit, struct shared *shared) { char *pa, *pb, *pc, *pd, *pl, *pm, *pn; size_t d, r; int cmp_result; int swaptype, swap_cnt; loop: if (depth_limit-- <= 0) { #ifdef I_AM_PSORT_B heapsort_b(a, n, es, cmp); #elif defined(I_AM_PSORT_R) __heapsort_r(a, n, es, thunk, cmp); #else heapsort(a, n, es, cmp); #endif return; } SWAPINIT(a, es); swap_cnt = 0; if (n < 7) { for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(thunk, pl - es, pl) > 0; pl -= es) swap(pl, pl - es); return; } pm = (char *)a + (n / 2) * es; if (n > 7) { pl = a; pn = (char *)a + (n - 1) * es; if (n > 40) { d = (n / 8) * es; pl = med3(pl, pl + d, pl + 2 * d, cmp, thunk); pm = med3(pm - d, pm, pm + d, cmp, thunk); pn = med3(pn - 2 * d, pn - d, pn, cmp, thunk); } pm = med3(pl, pm, pn, cmp, thunk); } swap(a, pm); pa = pb = (char *)a + es; pc = pd = (char *)a + (n - 1) * es; for (;;) { while (pb <= pc && (cmp_result = CMP(thunk, pb, a)) <= 0) { if (cmp_result == 0) { swap_cnt = 1; swap(pa, pb); pa += es; } pb += es; } while (pb <= pc && (cmp_result = CMP(thunk, pc, a)) >= 0) { if (cmp_result == 0) { swap_cnt = 1; swap(pc, pd); pd -= es; } pc -= es; } if (pb > pc) break; swap(pb, pc); swap_cnt = 1; pb += es; pc -= es; } pn = (char *)a + n * es; r = min(pa - (char *)a, pb - pa); vecswap(a, pb - r, r); r = min(pd - pc, pn - pd - es); vecswap(pb, pn - r, r); if (swap_cnt == 0) { /* Switch to insertion sort */ r = 1 + n / 4; /* n >= 7, so r >= 2 */ for (pm = (char *)a + es; pm < (char *)a + n * es; pm += es) for (pl = pm; pl > (char *)a && CMP(thunk, pl - es, pl) > 0; pl -= es) { swap(pl, pl - es); if (++swap_cnt > r) goto nevermind; } return; } nevermind: if ((r = pb - pa) > es) { r /= es; if (shared && r > shared->turnoff) { union args *args = getargs(shared); if (args == NULL) LIBC_ABORT("%s: getargs: %s", shared->who, strerror(errno)); args->shared = shared; args->a = a; args->n = r; args->depth_limit = depth_limit; OSAtomicIncrement32(&shared->count); dispatch_async_f(shared->queue, args, _psort_parallel); } else { #ifdef I_AM_PSORT_R _psort(a, r, es, thunk, cmp, depth_limit, NULL); #else _psort(a, r, es, cmp, depth_limit, NULL); #endif } } if ((r = pd - pc) > es) { /* Iterate rather than recurse to save stack space */ a = pn - r; n = r / es; goto loop; } /* psort(pn - r, r / es, es, cmp);*/ } static void _psort_parallel(void *x) { union args *args = (union args *)x; struct shared *shared = args->shared; _psort(args->a, args->n, shared->es, #ifdef I_AM_PSORT_R shared->thunk, #endif shared->cmp, args->depth_limit, shared); returnargs(shared, args); if(OSAtomicDecrement32(&shared->count) <= 0) { pthread_mutex_lock(&shared->mutex); pthread_cond_signal(&shared->cond); pthread_mutex_unlock(&shared->mutex); } } /* fast, approximate integer square root */ static size_t isqrt(size_t x) { size_t s = 1L << (flsl(x) / 2); return (s + x / s) / 2; } void #ifdef I_AM_PSORT_R psort_r(void *a, size_t n, size_t es, void *thunk, cmp_t *cmp) #elif defined(I_AM_PSORT_B) psort_b(void *a, size_t n, size_t es, cmp_t ^cmp) #else psort(void *a, size_t n, size_t es, cmp_t *cmp) #endif { if (n >= PARALLEL_MIN_SIZE && _NumCPUs() > 1) { struct shared shared; union args *args; bzero(&shared, sizeof(shared)); shared.sharedlock = OS_SPINLOCK_INIT; if ((args = getargs(&shared)) != NULL) { struct page *p, *pp; #ifdef I_AM_PSORT_R shared.who = "psort_r"; shared.thunk = thunk; #elif defined(I_AM_PSORT_B) shared.who = "psort_b"; #else shared.who = "psort"; #endif shared.cmp = cmp; shared.es = es; shared.queue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); shared.cond = (pthread_cond_t)PTHREAD_COND_INITIALIZER; shared.mutex = (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER; args->a = a; args->n = n; args->depth_limit = DEPTH(n); args->shared = &shared; /* * The turnoff value is the size of a partition that, * below which, we stop doing in parallel, and just do * in the current thread. The value of sqrt(n) was * determined heuristically. There is a smaller * dependence on the slowness of the comparison * function, and there might be a dependence on the * number of processors, but the algorithm has not been * determined. Because the sensitivity to the turnoff * value is relatively low, we use a fast, approximate * integer square root routine that is good enough for * this purpose. */ shared.turnoff = isqrt(n); OSAtomicIncrement32(&shared.count); _psort_parallel(args); /* wait for queue to drain */ pthread_mutex_lock(&shared.mutex); while(shared.count > 0) pthread_cond_wait(&shared.cond, &shared.mutex); pthread_mutex_unlock(&shared.mutex); pthread_mutex_destroy(&shared.mutex); pthread_cond_destroy(&shared.cond); for(p = shared.pagelist; p; p = pp) { pp = p->next; munmap(p, PAGESIZE); } return; } } /* Just call qsort */ #ifdef I_AM_PSORT_R qsort_r(a, n, es, thunk, cmp); #elif defined(I_AM_PSORT_B) qsort_b(a, n, es, cmp); #else qsort(a, n, es, cmp); #endif }