Initial revision

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@69872 138bc75d-0d04-0410-961f-82ee72b054a4
author: jsturm <jsturm@138bc75d-0d04-0410-961f-82ee72b054a4> 2003-07-28 03:46:07 +0000
committer: jsturm <jsturm@138bc75d-0d04-0410-961f-82ee72b054a4> 2003-07-28 03:46:07 +0000
commit: 715e3bd3f343537e75f28ab83f73d8c20ed7929f (patch)
tree: c698ff9b5618f81b32dbda3a94016046ef2e1a48
parent: 2f61b8cae7e804b377aede07f9d06291244ff64d (diff)
download: gcc-715e3bd3f343537e75f28ab83f73d8c20ed7929f.tar.gz
22 files changed, 5739 insertions, 0 deletions
diff --git a/boehm-gc/aix_irix_threads.c b/boehm-gc/aix_irix_threads.c
new file mode 100644
index 00000000000..d8ac3454af9
--- /dev/null
+++ b/boehm-gc/aix_irix_threads.c
@@ -0,0 +1,693 @@
+/* 
+ * Copyright (c) 1991-1995 by Xerox Corporation.  All rights reserved.
+ * Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
+ * Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ */
+/*
+ * Support code for Irix (>=6.2) Pthreads and for AIX pthreads.
+ * This relies on properties
+ * not guaranteed by the Pthread standard.  It may or may not be portable
+ * to other implementations.
+ *
+ * Note that there is a lot of code duplication between this file and
+ * (pthread_support.c, pthread_stop_world.c).  They should be merged.
+ * Pthread_support.c should be directly usable.
+ *
+ * Please avoid adding new ports here; use the generic pthread support
+ * as a base instead.
+ */
+
+# if defined(GC_IRIX_THREADS) || defined(GC_AIX_THREADS)
+
+# include "private/gc_priv.h"
+# include <pthread.h>
+# include <assert.h>
+# include <semaphore.h>
+# include <time.h>
+# include <errno.h>
+# include <unistd.h>
+# include <sys/mman.h>
+# include <sys/time.h>
+
+#undef pthread_create
+#undef pthread_sigmask
+#undef pthread_join
+
+#if defined(GC_IRIX_THREADS) && !defined(MUTEX_RECURSIVE_NP)
+#define MUTEX_RECURSIVE_NP PTHREAD_MUTEX_RECURSIVE
+#endif
+
+void GC_thr_init();
+
+#if 0
+void GC_print_sig_mask()
+{
+    sigset_t blocked;
+    int i;
+
+    if (pthread_sigmask(SIG_BLOCK, NULL, &blocked) != 0)
+    	ABORT("pthread_sigmask");
+    GC_printf0("Blocked: ");
+    for (i = 1; i <= MAXSIG; i++) {
+        if (sigismember(&blocked, i)) { GC_printf1("%ld ",(long) i); }
+    }
+    GC_printf0("\n");
+}
+#endif
+
+/* We use the allocation lock to protect thread-related data structures. */
+
+/* The set of all known threads.  We intercept thread creation and 	*/
+/* joins.  We never actually create detached threads.  We allocate all 	*/
+/* new thread stacks ourselves.  These allow us to maintain this	*/
+/* data structure.							*/
+/* Protected by GC_thr_lock.						*/
+/* Some of this should be declared volatile, but that's incosnsistent	*/
+/* with some library routine declarations.  		 		*/
+typedef struct GC_Thread_Rep {
+    struct GC_Thread_Rep * next;  /* More recently allocated threads	*/
+				  /* with a given pthread id come 	*/
+				  /* first.  (All but the first are	*/
+				  /* guaranteed to be dead, but we may  */
+				  /* not yet have registered the join.) */
+    pthread_t id;
+    word stop;
+#	define NOT_STOPPED 0
+#	define PLEASE_STOP 1
+#	define STOPPED 2
+    word flags;
+#	define FINISHED 1   	/* Thread has exited.	*/
+#	define DETACHED 2	/* Thread is intended to be detached.	*/
+    ptr_t stack_cold;		/* cold end of the stack		*/
+    ptr_t stack_hot;  		/* Valid only when stopped. */
+				/* But must be within stack region at	*/
+				/* all times.				*/
+    void * status;		/* Used only to avoid premature 	*/
+				/* reclamation of any data it might 	*/
+				/* reference.				*/
+} * GC_thread;
+
+GC_thread GC_lookup_thread(pthread_t id);
+
+/*
+ * The only way to suspend threads given the pthread interface is to send
+ * signals.  Unfortunately, this means we have to reserve
+ * a signal, and intercept client calls to change the signal mask.
+ */
+#if 0 /* DOB: 6.1 */
+# if defined(GC_AIX_THREADS)
+#   define SIG_SUSPEND SIGUSR1
+# else
+#   define SIG_SUSPEND (SIGRTMIN + 6)
+# endif
+#endif
+
+pthread_mutex_t GC_suspend_lock = PTHREAD_MUTEX_INITIALIZER;
+				/* Number of threads stopped so far	*/
+pthread_cond_t GC_suspend_ack_cv = PTHREAD_COND_INITIALIZER;
+pthread_cond_t GC_continue_cv = PTHREAD_COND_INITIALIZER;
+
+void GC_suspend_handler(int sig)
+{
+    int dummy;
+    GC_thread me;
+    sigset_t all_sigs;
+    sigset_t old_sigs;
+    int i;
+
+    if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler");
+    me = GC_lookup_thread(pthread_self());
+    /* The lookup here is safe, since I'm doing this on behalf  */
+    /* of a thread which holds the allocation lock in order	*/
+    /* to stop the world.  Thus concurrent modification of the	*/
+    /* data structure is impossible.				*/
+    if (PLEASE_STOP != me -> stop) {
+	/* Misdirected signal.	*/
+	pthread_mutex_unlock(&GC_suspend_lock);
+	return;
+    }
+    pthread_mutex_lock(&GC_suspend_lock);
+    me -> stack_hot = (ptr_t)(&dummy);
+    me -> stop = STOPPED;
+    pthread_cond_signal(&GC_suspend_ack_cv);
+    pthread_cond_wait(&GC_continue_cv, &GC_suspend_lock);
+    pthread_mutex_unlock(&GC_suspend_lock);
+    /* GC_printf1("Continuing 0x%x\n", pthread_self()); */
+}
+
+
+GC_bool GC_thr_initialized = FALSE;
+
+
+# define THREAD_TABLE_SZ 128	/* Must be power of 2	*/
+volatile GC_thread GC_threads[THREAD_TABLE_SZ];
+
+void GC_push_thread_structures GC_PROTO((void))
+{
+    GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
+}
+
+/* Add a thread to GC_threads.  We assume it wasn't already there.	*/
+/* Caller holds allocation lock.					*/
+GC_thread GC_new_thread(pthread_t id)
+{
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    GC_thread result;
+    static struct GC_Thread_Rep first_thread;
+    static GC_bool first_thread_used = FALSE;
+    
+    GC_ASSERT(I_HOLD_LOCK());
+    if (!first_thread_used) {
+    	result = &first_thread;
+    	first_thread_used = TRUE;
+    	/* Dont acquire allocation lock, since we may already hold it. */
+    } else {
+        result = (struct GC_Thread_Rep *)
+        	 GC_generic_malloc_inner(sizeof(struct GC_Thread_Rep), NORMAL);
+    }
+    if (result == 0) return(0);
+    result -> id = id;
+    result -> next = GC_threads[hv];
+    GC_threads[hv] = result;
+    /* result -> flags = 0;     */
+    /* result -> stop = 0;	*/
+    return(result);
+}
+
+/* Delete a thread from GC_threads.  We assume it is there.	*/
+/* (The code intentionally traps if it wasn't.)			*/
+/* Caller holds allocation lock.				*/
+/* We explicitly pass in the GC_thread we're looking for, since */
+/* if a thread has been joined, but we have not yet		*/
+/* been notified, then there may be more than one thread 	*/
+/* in the table with the same pthread id.			*/
+/* This is OK, but we need a way to delete a specific one.	*/
+void GC_delete_gc_thread(pthread_t id, GC_thread gc_id)
+{
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    register GC_thread p = GC_threads[hv];
+    register GC_thread prev = 0;
+
+    GC_ASSERT(I_HOLD_LOCK());
+    while (p != gc_id) {
+        prev = p;
+        p = p -> next;
+    }
+    if (prev == 0) {
+        GC_threads[hv] = p -> next;
+    } else {
+        prev -> next = p -> next;
+    }
+}
+
+/* Return a GC_thread corresponding to a given thread_t.	*/
+/* Returns 0 if it's not there.					*/
+/* Caller holds  allocation lock or otherwise inhibits 		*/
+/* updates.							*/
+/* If there is more than one thread with the given id we 	*/
+/* return the most recent one.					*/
+GC_thread GC_lookup_thread(pthread_t id)
+{
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    register GC_thread p = GC_threads[hv];
+    
+    /* I either hold the lock, or i'm being called from the stop-the-world
+     * handler. */
+#if defined(GC_AIX_THREADS)
+    GC_ASSERT(I_HOLD_LOCK()); /* no stop-the-world handler needed on AIX */
+#endif
+    while (p != 0 && !pthread_equal(p -> id, id)) p = p -> next;
+    return(p);
+}
+
+#if defined(GC_AIX_THREADS)
+void GC_stop_world()
+{
+    pthread_t my_thread = pthread_self();
+    register int i;
+    register GC_thread p;
+    register int result;
+    struct timespec timeout;
+
+    GC_ASSERT(I_HOLD_LOCK());
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+        if (p -> id != my_thread) {
+          pthread_suspend_np(p->id);
+        }
+      }
+    }
+    /* GC_printf1("World stopped 0x%x\n", pthread_self()); */
+}
+
+void GC_start_world()
+{
+    GC_thread p;
+    unsigned i;
+    pthread_t my_thread = pthread_self();
+
+    /* GC_printf0("World starting\n"); */
+    GC_ASSERT(I_HOLD_LOCK());
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+        if (p -> id != my_thread) {
+          pthread_continue_np(p->id);
+        }
+      }
+    }
+}
+
+#else /* GC_AIX_THREADS */
+
+/* Caller holds allocation lock.	*/
+void GC_stop_world()
+{
+    pthread_t my_thread = pthread_self();
+    register int i;
+    register GC_thread p;
+    register int result;
+    struct timespec timeout;
+    
+    GC_ASSERT(I_HOLD_LOCK());
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+        if (p -> id != my_thread) {
+            if (p -> flags & FINISHED) {
+		p -> stop = STOPPED;
+		continue;
+	    }
+	    p -> stop = PLEASE_STOP;
+            result = pthread_kill(p -> id, SIG_SUSPEND);
+	    /* GC_printf1("Sent signal to 0x%x\n", p -> id); */
+	    switch(result) {
+                case ESRCH:
+                    /* Not really there anymore.  Possible? */
+                    p -> stop = STOPPED;
+                    break;
+                case 0:
+                    break;
+                default:
+                    ABORT("pthread_kill failed");
+            }
+        }
+      }
+    }
+    pthread_mutex_lock(&GC_suspend_lock);
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+        while (p -> id != my_thread && p -> stop != STOPPED) {
+	    clock_gettime(CLOCK_REALTIME, &timeout);
+            timeout.tv_nsec += 50000000; /* 50 msecs */
+            if (timeout.tv_nsec >= 1000000000) {
+                timeout.tv_nsec -= 1000000000;
+                ++timeout.tv_sec;
+            }
+            result = pthread_cond_timedwait(&GC_suspend_ack_cv,
+					    &GC_suspend_lock,
+                                            &timeout);
+            if (result == ETIMEDOUT) {
+                /* Signal was lost or misdirected.  Try again.      */
+                /* Duplicate signals should be benign.              */
+                result = pthread_kill(p -> id, SIG_SUSPEND);
+	    }
+	}
+      }
+    }
+    pthread_mutex_unlock(&GC_suspend_lock);
+    /* GC_printf1("World stopped 0x%x\n", pthread_self()); */
+}
+
+/* Caller holds allocation lock.	*/
+void GC_start_world()
+{
+    GC_thread p;
+    unsigned i;
+
+    /* GC_printf0("World starting\n"); */
+    GC_ASSERT(I_HOLD_LOCK());
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+	p -> stop = NOT_STOPPED;
+      }
+    }
+    pthread_mutex_lock(&GC_suspend_lock);
+    /* All other threads are at pthread_cond_wait in signal handler.	*/
+    /* Otherwise we couldn't have acquired the lock.			*/
+    pthread_mutex_unlock(&GC_suspend_lock);
+    pthread_cond_broadcast(&GC_continue_cv);
+}
+
+#endif /* GC_AIX_THREADS */
+
+
+/* We hold allocation lock.  Should do exactly the right thing if the	*/
+/* world is stopped.  Should not fail if it isn't.			*/
+void GC_push_all_stacks()
+{
+    register int i;
+    register GC_thread p;
+    register ptr_t hot, cold;
+    pthread_t me = pthread_self();
+    
+    /* GC_init() should have been called before GC_push_all_stacks is
+     * invoked, and GC_init calls GC_thr_init(), which sets
+     * GC_thr_initialized. */
+    GC_ASSERT(GC_thr_initialized);
+
+    /* GC_printf1("Pushing stacks from thread 0x%x\n", me); */
+    GC_ASSERT(I_HOLD_LOCK());
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+        if (p -> flags & FINISHED) continue;
+	cold = p->stack_cold;
+	if (!cold) cold=GC_stackbottom; /* 0 indicates 'original stack' */
+        if (pthread_equal(p -> id, me)) {
+	    hot = GC_approx_sp();
+	} else {
+#        ifdef GC_AIX_THREADS
+          /* AIX doesn't use signals to suspend, so we need to get an */
+	  /* accurate hot stack pointer.			      */
+	  /* See http://publib16.boulder.ibm.com/pseries/en_US/libs/basetrf1/pthread_getthrds_np.htm */
+          pthread_t id = p -> id;
+          struct __pthrdsinfo pinfo;
+          int regbuf[64];
+          int val = sizeof(regbuf);
+          int retval = pthread_getthrds_np(&id, PTHRDSINFO_QUERY_ALL, &pinfo,
+			  		   sizeof(pinfo), regbuf, &val);
+          if (retval != 0) {
+	    printf("ERROR: pthread_getthrds_np() failed in GC\n");
+	    abort();
+	  }
+	  /* according to the AIX ABI, 
+	     "the lowest possible valid stack address is 288 bytes (144 + 144)
+	     less than the current value of the stack pointer.  Functions may
+	     use this stack space as volatile storage which is not preserved
+	     across function calls."
+	     ftp://ftp.penguinppc64.org/pub/people/amodra/PPC-elf64abi.txt.gz
+	  */
+          hot = (ptr_t)(unsigned long)pinfo.__pi_ustk-288;
+	  cold = (ptr_t)pinfo.__pi_stackend; /* more precise */
+          /* push the registers too, because they won't be on stack */
+          GC_push_all_eager((ptr_t)&pinfo.__pi_context,
+			    (ptr_t)((&pinfo.__pi_context)+1));
+          GC_push_all_eager((ptr_t)regbuf, ((ptr_t)regbuf)+val);
+#	 else
+              hot = p -> stack_hot;
+#	 endif
+	}
+#	ifdef STACK_GROWS_UP
+          GC_push_all_stack(cold, hot);
+#	else
+ /* printf("thread 0x%x: hot=0x%08x cold=0x%08x\n", p -> id, hot, cold); */
+          GC_push_all_stack(hot, cold);
+#	endif
+      }
+    }
+}
+
+
+/* We hold the allocation lock.	*/
+void GC_thr_init()
+{
+    GC_thread t;
+    struct sigaction act;
+
+    if (GC_thr_initialized) return;
+#if 0
+    /* unfortunately, GC_init_inner calls us without the lock, so
+     * this assertion is not always true. */
+    /* Why doesn't GC_init_inner hold the lock? - HB		*/
+    GC_ASSERT(I_HOLD_LOCK());
+#endif
+    GC_thr_initialized = TRUE;
+#ifndef GC_AIX_THREADS
+    (void) sigaction(SIG_SUSPEND, 0, &act);
+    if (act.sa_handler != SIG_DFL)
+    	ABORT("Previously installed SIG_SUSPEND handler");
+    /* Install handler.	*/
+	act.sa_handler = GC_suspend_handler;
+	act.sa_flags = SA_RESTART;
+	(void) sigemptyset(&act.sa_mask);
+        if (0 != sigaction(SIG_SUSPEND, &act, 0))
+	    ABORT("Failed to install SIG_SUSPEND handler");
+#endif
+    /* Add the initial thread, so we can stop it.	*/
+      t = GC_new_thread(pthread_self());
+      /* use '0' to indicate GC_stackbottom, since GC_init() has not
+       * completed by the time we are called (from GC_init_inner()) */
+      t -> stack_cold = 0; /* the original stack. */
+      t -> stack_hot = (ptr_t)(&t);
+      t -> flags = DETACHED;
+}
+
+int GC_pthread_sigmask(int how, const sigset_t *set, sigset_t *oset)
+{
+    sigset_t fudged_set;
+    
+#ifdef GC_AIX_THREADS
+    return(pthread_sigmask(how, set, oset));
+#endif
+
+    if (set != NULL && (how == SIG_BLOCK || how == SIG_SETMASK)) {
+        fudged_set = *set;
+        sigdelset(&fudged_set, SIG_SUSPEND);
+        set = &fudged_set;
+    }
+    return(pthread_sigmask(how, set, oset));
+}
+
+struct start_info {
+    void *(*start_routine)(void *);
+    void *arg;
+    word flags;
+    pthread_mutex_t registeredlock;
+    pthread_cond_t registered;     
+    int volatile registereddone;
+};
+
+void GC_thread_exit_proc(void *arg)
+{
+    GC_thread me;
+
+    LOCK();
+    me = GC_lookup_thread(pthread_self());
+    me -> flags |= FINISHED;
+    /* reclaim DETACHED thread right away; otherwise wait until join() */
+    if (me -> flags & DETACHED) {
+	GC_delete_gc_thread(pthread_self(), me);
+    }
+    UNLOCK();
+}
+
+int GC_pthread_join(pthread_t thread, void **retval)
+{
+    int result;
+    GC_thread thread_gc_id;
+    
+    LOCK();
+    thread_gc_id = GC_lookup_thread(thread);
+    /* This is guaranteed to be the intended one, since the thread id	*/
+    /* cant have been recycled by pthreads.				*/
+    UNLOCK();
+    GC_ASSERT(!(thread_gc_id->flags & DETACHED));
+    result = pthread_join(thread, retval);
+    /* Some versions of the Irix pthreads library can erroneously 	*/
+    /* return EINTR when the call succeeds.				*/
+	if (EINTR == result) result = 0;
+    GC_ASSERT(thread_gc_id->flags & FINISHED);
+    LOCK();
+    /* Here the pthread thread id may have been recycled. */
+    GC_delete_gc_thread(thread, thread_gc_id);
+    UNLOCK();
+    return result;
+}
+
+void * GC_start_routine(void * arg)
+{
+    int dummy;
+    struct start_info * si = arg;
+    void * result;
+    GC_thread me;
+    pthread_t my_pthread;
+    void *(*start)(void *);
+    void *start_arg;
+
+    my_pthread = pthread_self();
+    /* If a GC occurs before the thread is registered, that GC will	*/
+    /* ignore this thread.  That's fine, since it will block trying to  */
+    /* acquire the allocation lock, and won't yet hold interesting 	*/
+    /* pointers.							*/
+    LOCK();
+    /* We register the thread here instead of in the parent, so that	*/
+    /* we don't need to hold the allocation lock during pthread_create. */
+    /* Holding the allocation lock there would make REDIRECT_MALLOC	*/
+    /* impossible.  It probably still doesn't work, but we're a little  */
+    /* closer ...							*/
+    /* This unfortunately means that we have to be careful the parent	*/
+    /* doesn't try to do a pthread_join before we're registered.	*/
+    me = GC_new_thread(my_pthread);
+    me -> flags = si -> flags;
+    me -> stack_cold = (ptr_t) &dummy; /* this now the 'start of stack' */
+    me -> stack_hot = me->stack_cold;/* this field should always be sensible */
+    UNLOCK();
+    start = si -> start_routine;
+    start_arg = si -> arg;
+
+    pthread_mutex_lock(&(si->registeredlock));
+    si->registereddone = 1;
+    pthread_cond_signal(&(si->registered));
+    pthread_mutex_unlock(&(si->registeredlock));
+    /* si went away as soon as we did this unlock */
+
+    pthread_cleanup_push(GC_thread_exit_proc, 0);
+    result = (*start)(start_arg);
+    me -> status = result;
+    pthread_cleanup_pop(1);
+	/* This involves acquiring the lock, ensuring that we can't exit */
+	/* while a collection that thinks we're alive is trying to stop  */
+	/* us.								 */
+    return(result);
+}
+
+int
+GC_pthread_create(pthread_t *new_thread,
+		  const pthread_attr_t *attr,
+                  void *(*start_routine)(void *), void *arg)
+{
+    int result;
+    GC_thread t;
+    int detachstate;
+    word my_flags = 0;
+    struct start_info * si;
+    	/* This is otherwise saved only in an area mmapped by the thread */
+    	/* library, which isn't visible to the collector.		 */
+
+    LOCK();
+    /* GC_INTERNAL_MALLOC implicitly calls GC_init() if required */
+    si = (struct start_info *)GC_INTERNAL_MALLOC(sizeof(struct start_info),
+						 NORMAL);
+    GC_ASSERT(GC_thr_initialized); /* initialized by GC_init() */
+    UNLOCK();
+    if (0 == si) return(ENOMEM);
+    pthread_mutex_init(&(si->registeredlock), NULL);
+    pthread_cond_init(&(si->registered),NULL);
+    pthread_mutex_lock(&(si->registeredlock));
+    si -> start_routine = start_routine;
+    si -> arg = arg;
+
+    pthread_attr_getdetachstate(attr, &detachstate);
+    if (PTHREAD_CREATE_DETACHED == detachstate) my_flags |= DETACHED;
+    si -> flags = my_flags;
+    result = pthread_create(new_thread, attr, GC_start_routine, si); 
+
+    /* Wait until child has been added to the thread table.		*/
+    /* This also ensures that we hold onto si until the child is done	*/
+    /* with it.  Thus it doesn't matter whether it is otherwise		*/
+    /* visible to the collector.					*/
+
+    if (0 == result) {
+      si->registereddone = 0;
+      while (!si->registereddone) 
+        pthread_cond_wait(&(si->registered), &(si->registeredlock));
+    }
+    pthread_mutex_unlock(&(si->registeredlock));
+
+    pthread_cond_destroy(&(si->registered));
+    pthread_mutex_destroy(&(si->registeredlock));
+    LOCK();
+    GC_INTERNAL_FREE(si);
+    UNLOCK();
+
+    return(result);
+}
+
+/* For now we use the pthreads locking primitives on HP/UX */
+
+VOLATILE GC_bool GC_collecting = 0; /* A hint that we're in the collector and       */
+                        /* holding the allocation lock for an           */
+                        /* extended period.                             */
+
+/* Reasonably fast spin locks.  Basically the same implementation */
+/* as STL alloc.h.						  */
+
+#define SLEEP_THRESHOLD 3
+
+volatile unsigned int GC_allocate_lock = 0;
+#define GC_TRY_LOCK() !GC_test_and_set(&GC_allocate_lock)
+#define GC_LOCK_TAKEN GC_allocate_lock
+
+void GC_lock()
+{
+#   define low_spin_max 30  /* spin cycles if we suspect uniprocessor */
+#   define high_spin_max 1000 /* spin cycles for multiprocessor */
+    static unsigned spin_max = low_spin_max;
+    unsigned my_spin_max;
+    static unsigned last_spins = 0;
+    unsigned my_last_spins;
+    volatile unsigned junk;
+#   define PAUSE junk *= junk; junk *= junk; junk *= junk; junk *= junk
+    int i;
+
+    if (GC_TRY_LOCK()) {
+        return;
+    }
+    junk = 0;
+    my_spin_max = spin_max;
+    my_last_spins = last_spins;
+    for (i = 0; i < my_spin_max; i++) {
+        if (GC_collecting) goto yield;
+        if (i < my_last_spins/2 || GC_LOCK_TAKEN) {
+            PAUSE; 
+            continue;
+        }
+        if (GC_TRY_LOCK()) {
+	    /*
+             * got it!
+             * Spinning worked.  Thus we're probably not being scheduled
+             * against the other process with which we were contending.
+             * Thus it makes sense to spin longer the next time.
+	     */
+            last_spins = i;
+            spin_max = high_spin_max;
+            return;
+        }
+    }
+    /* We are probably being scheduled against the other process.  Sleep. */
+    spin_max = low_spin_max;
+yield:
+    for (i = 0;; ++i) {
+        if (GC_TRY_LOCK()) {
+            return;
+        }
+        if (i < SLEEP_THRESHOLD) {
+            sched_yield();
+	} else {
+	    struct timespec ts;
+	
+	    if (i > 26) i = 26;
+			/* Don't wait for more than about 60msecs, even	*/
+			/* under extreme contention.			*/
+	    ts.tv_sec = 0;
+	    ts.tv_nsec = 1 << i;
+	    nanosleep(&ts, 0);
+	}
+    }
+}
+
+# else  /* !GC_IRIX_THREADS && !GC_AIX_THREADS */
+
+#ifndef LINT
+  int GC_no_Irix_threads;
+#endif
+
+# endif /* IRIX_THREADS */
+
diff --git a/boehm-gc/alpha_mach_dep.S b/boehm-gc/alpha_mach_dep.S
new file mode 100644
index 00000000000..53547307a5d
--- /dev/null
+++ b/boehm-gc/alpha_mach_dep.S
@@ -0,0 +1,87 @@
+ # $Id: alpha_mach_dep.s,v 1.2 1993/01/18 22:54:51 dosser Exp $
+	.arch ev6
+
+        .text
+        .align  4
+        .globl  GC_push_regs
+        .ent    GC_push_regs 2
+GC_push_regs:
+	ldgp    $gp, 0($27)
+	lda     $sp, -16($sp)
+	stq     $26, 0($sp)
+        .mask   0x04000000, 0
+        .frame  $sp, 16, $26, 0
+
+ # $0		integer result
+ # $1-$8	temp regs - not preserved cross calls
+ # $9-$15	call saved regs
+ # $16-$21	argument regs - not preserved cross calls
+ # $22-$28	temp regs - not preserved cross calls
+ # $29		global pointer - not preserved cross calls
+ # $30		stack pointer
+
+# define call_push(x)			\
+	mov   x, $16;			\
+	jsr   $26, GC_push_one;		\
+	ldgp  $gp, 0($26)
+	
+        call_push($9)
+        call_push($10)
+        call_push($11)
+        call_push($12)
+        call_push($13)
+        call_push($14)
+        call_push($15)
+
+ # $f0-$f1	floating point results
+ # $f2-$f9	call saved regs
+ # $f10-$f30	temp regs - not preserved cross calls
+
+	# Use the most efficient transfer method for this hardware.
+	# Bit 1 detects the FIX extension, which includes ftoit.
+	amask	2, $0
+	bne	$0, $use_stack
+
+#undef call_push
+#define call_push(x)			\
+	ftoit	x, $16;			\
+	jsr	$26, GC_push_one;	\
+	ldgp	$gp, 0($26)
+
+	call_push($f2)
+	call_push($f3)
+	call_push($f4)
+	call_push($f5)
+	call_push($f6)
+	call_push($f7)
+	call_push($f8)
+	call_push($f9)
+
+	ldq     $26, 0($sp)
+	lda     $sp, 16($sp)
+	ret     $31, ($26), 1
+
+	.align	4
+$use_stack:
+
+#undef call_push
+#define call_push(x)			\
+	stt	x, 8($sp);		\
+	ldq	$16, 8($sp);		\
+	jsr	$26, GC_push_one;	\
+	ldgp	$gp, 0($26)
+
+	call_push($f2)
+	call_push($f3)
+	call_push($f4)
+	call_push($f5)
+	call_push($f6)
+	call_push($f7)
+	call_push($f8)
+	call_push($f9)
+
+	ldq     $26, 0($sp)
+	lda     $sp, 16($sp)
+	ret     $31, ($26), 1
+
+	.end    GC_push_regs
diff --git a/boehm-gc/darwin_stop_world.c b/boehm-gc/darwin_stop_world.c
new file mode 100644
index 00000000000..bc2247fa4e7
--- /dev/null
+++ b/boehm-gc/darwin_stop_world.c
@@ -0,0 +1,209 @@
+#include "private/pthread_support.h"
+
+# if defined(GC_DARWIN_THREADS)
+
+#define DEBUG_THREADS 0
+
+/* From "Inside Mac OS X - Mach-O Runtime Architecture" published by Apple
+   Page 49:
+   "The space beneath the stack pointer, where a new stack frame would normally
+   be allocated, is called the red zone. This area as shown in Figure 3-2 may
+   be used for any purpose as long as a new stack frame does not need to be
+   added to the stack."
+   
+   Page 50: "If a leaf procedure's red zone usage would exceed 224 bytes, then
+   it must set up a stack frame just like routines that call other routines."
+*/
+#define PPC_RED_ZONE_SIZE 224
+
+void GC_push_all_stacks() {
+    int i;
+    kern_return_t r;
+    GC_thread p;
+    pthread_t me;
+    ptr_t lo, hi;
+#	if defined(POWERPC)
+        ppc_thread_state_t state;
+#	else
+#		error FIXME for non-ppc OS X
+#	endif
+    mach_msg_type_number_t thread_state_count = MACHINE_THREAD_STATE_COUNT;
+    
+    me = pthread_self();
+    if (!GC_thr_initialized) GC_thr_init();
+    
+    for(i=0;i<THREAD_TABLE_SZ;i++) {
+        for(p=GC_threads[i];p!=0;p=p->next) {
+            if(p -> flags & FINISHED) continue;
+            if(pthread_equal(p->id,me)) {
+                lo = GC_approx_sp();
+            } else {
+                /* Get the thread state (registers, etc) */
+                r = thread_get_state(
+                    p->stop_info.mach_thread,
+                    MACHINE_THREAD_STATE,
+                    (natural_t*)&state,
+                    &thread_state_count);
+                if(r != KERN_SUCCESS) ABORT("thread_get_state failed");
+    
+                #ifdef POWERPC
+                    lo = (void*)(state.r1 - PPC_RED_ZONE_SIZE);
+                    
+                    GC_push_one(state.r0); 
+                    GC_push_one(state.r2); 
+                    GC_push_one(state.r3); 
+                    GC_push_one(state.r4); 
+                    GC_push_one(state.r5); 
+                    GC_push_one(state.r6); 
+                    GC_push_one(state.r7); 
+                    GC_push_one(state.r8); 
+                    GC_push_one(state.r9); 
+                    GC_push_one(state.r10); 
+                    GC_push_one(state.r11); 
+                    GC_push_one(state.r12); 
+                    GC_push_one(state.r13); 
+                    GC_push_one(state.r14); 
+                    GC_push_one(state.r15); 
+                    GC_push_one(state.r16); 
+                    GC_push_one(state.r17); 
+                    GC_push_one(state.r18); 
+                    GC_push_one(state.r19); 
+                    GC_push_one(state.r20); 
+                    GC_push_one(state.r21); 
+                    GC_push_one(state.r22); 
+                    GC_push_one(state.r23); 
+                    GC_push_one(state.r24); 
+                    GC_push_one(state.r25); 
+                    GC_push_one(state.r26); 
+                    GC_push_one(state.r27); 
+                    GC_push_one(state.r28); 
+                    GC_push_one(state.r29); 
+                    GC_push_one(state.r30); 
+                    GC_push_one(state.r31);
+                #else
+                #	error FIXME for non-PPC darwin
+                #endif /* !POWERPC */
+            } /* p != me */
+            if(p->flags & MAIN_THREAD)
+                hi = GC_stackbottom;
+            else
+                hi = p->stack_end;
+            #if DEBUG_THREADS
+                GC_printf3("Darwin: Stack for thread 0x%lx = [%lx,%lx)\n",
+                    (unsigned long) p -> id,
+                    (unsigned long) lo,
+                    (unsigned long) hi
+                );
+            #endif
+            GC_push_all_stack(lo,hi);
+        } /* for(p=GC_threads[i]...) */
+    } /* for(i=0;i<THREAD_TABLE_SZ...) */
+}
+
+/* Caller holds allocation lock.	*/
+void GC_stop_world()
+{
+    int i;
+    GC_thread p;
+    pthread_t my_thread = pthread_self();
+    kern_return_t kern_result;
+    
+    #if DEBUG_THREADS
+    GC_printf1("Stopping the world from 0x%lx\n", pthread_self());
+    #endif
+       
+    /* Make sure all free list construction has stopped before we start. */
+    /* No new construction can start, since free list construction is	*/
+    /* required to acquire and release the GC lock before it starts,	*/
+    /* and we have the lock.						*/
+#   ifdef PARALLEL_MARK
+      GC_acquire_mark_lock();
+      GC_ASSERT(GC_fl_builder_count == 0);
+      /* We should have previously waited for it to become zero. */
+#   endif /* PARALLEL_MARK */
+
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+        for (p = GC_threads[i]; p != 0; p = p -> next) {
+            if (p -> id == my_thread) continue;
+            if (p -> flags & FINISHED) continue;
+            if (p -> thread_blocked) /* Will wait */ continue;
+            
+            #if DEBUG_THREADS
+            GC_printf1("Suspending thread 0x%lx\n", p -> id);
+            #endif
+            
+            /* Suspend the thread */
+            kern_result = thread_suspend(p->stop_info.mach_thread);
+            if(kern_result != KERN_SUCCESS) ABORT("thread_suspend failed");
+            
+            /* This is only needed if we are modifying the threads 
+               state. thread_abort_safely should also be used
+               if this code is ever added in again.
+               
+               kern_result = thread_abort(p->stop_info.mach_thread);
+               if(kern_result != KERN_SUCCESS)
+                   ABORT("thread_abort failed (%ul)",kern_result);
+            */
+        }
+    }
+    
+#   ifdef MPROTECT_VDB
+    if(GC_incremental) {
+        extern void GC_mprotect_stop();
+        GC_mprotect_stop();
+    }
+#   endif
+    
+#   ifdef PARALLEL_MARK
+      GC_release_mark_lock();
+#   endif
+    #if DEBUG_THREADS
+      GC_printf1("World stopped from 0x%lx\n", pthread_self());
+    #endif
+}
+
+/* Caller holds allocation lock, and has held it continuously since	*/
+/* the world stopped.							*/
+void GC_start_world()
+{
+    pthread_t my_thread = pthread_self();
+    int i;
+    GC_thread p;
+    kern_return_t kern_result;
+
+#   if DEBUG_THREADS
+      GC_printf0("World starting\n");
+#   endif
+
+#   ifdef MPROTECT_VDB
+    if(GC_incremental) {
+        extern void GC_mprotect_resume();
+        GC_mprotect_resume();
+    }
+#   endif
+
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+        for (p = GC_threads[i]; p != 0; p = p -> next) {
+            if (p -> id == my_thread) continue;
+            if (p -> flags & FINISHED) continue;
+            if (p -> thread_blocked) continue;
+    
+            #if DEBUG_THREADS
+            GC_printf1("Resuming 0x%lx\n", p -> id);
+            #endif
+            
+            /* Resume the thread */
+            kern_result = thread_resume(p->stop_info.mach_thread);
+            if(kern_result != KERN_SUCCESS) ABORT("thread_resume failed");
+        }
+    }
+    #if DEBUG_THREADS
+      GC_printf0("World started\n");
+    #endif
+}
+
+void GC_stop_init() {
+
+}
+
+#endif
diff --git a/boehm-gc/depcomp b/boehm-gc/depcomp
new file mode 100755
index 00000000000..3480ce4e96d
--- /dev/null
+++ b/boehm-gc/depcomp
@@ -0,0 +1,436 @@
+#! /bin/sh
+
+# depcomp - compile a program generating dependencies as side-effects
+# Copyright 1999, 2000 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Alexandre Oliva <oliva@dcc.unicamp.br>.
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+  echo "depcomp: Variables source, object and depmode must be set" 1>&2
+  exit 1
+fi
+# `libtool' can also be set to `yes' or `no'.
+
+if test -z "$depfile"; then
+   base=`echo "$object" | sed -e 's,^.*/,,' -e 's,\.\([^.]*\)$,.P\1,'`
+   dir=`echo "$object" | sed 's,/.*$,/,'`
+   if test "$dir" = "$object"; then
+      dir=
+   fi
+   # FIXME: should be _deps on DOS.
+   depfile="$dir.deps/$base"
+fi
+
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Some modes work just like other modes, but use different flags.  We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write.  Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+  # HP compiler uses -M and no extra arg.
+  gccflag=-M
+  depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+   # This is just like dashmstdout with a different argument.
+   dashmflag=-xM
+   depmode=dashmstdout
+fi
+
+case "$depmode" in
+gcc3)
+## gcc 3 implements dependency tracking that does exactly what
+## we want.  Yay!  Note: for some reason libtool 1.4 doesn't like
+## it if -MD -MP comes after the -MF stuff.  Hmm.
+  "$@" -MT "$object" -MD -MP -MF "$tmpdepfile"
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  mv "$tmpdepfile" "$depfile"
+  ;;
+
+gcc)
+## There are various ways to get dependency output from gcc.  Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+##   up in a subdir.  Having to rename by hand is ugly.
+##   (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+##   -MM, not -M (despite what the docs say).
+## - Using -M directly means running the compiler twice (even worse
+##   than renaming).
+  if test -z "$gccflag"; then
+    gccflag=-MD,
+  fi
+  "$@" -Wp,"$gccflag$tmpdepfile"
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
+## The second -e expression handles DOS-style file names with drive letters.
+  sed -e 's/^[^:]*: / /' \
+      -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the `deleted header file' problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header).  We avoid this by adding
+## dummy dependencies for each header file.  Too bad gcc doesn't do
+## this for us directly.
+  tr ' ' '
+' < "$tmpdepfile" |
+## Some versions of gcc put a space before the `:'.  On the theory
+## that the space means something, we add a space to the output as
+## well.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+hp)
+  # This case exists only to let depend.m4 do its work.  It works by
+  # looking at the text of this script.  This case will never be run,
+  # since it is checked for above.
+  exit 1
+  ;;
+
+sgi)
+  if test "$libtool" = yes; then
+    "$@" "-Wp,-MDupdate,$tmpdepfile"
+  else
+    "$@" -MDupdate "$tmpdepfile"
+  fi
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+  rm -f "$depfile"
+
+  if test -f "$tmpdepfile"; then  # yes, the sourcefile depend on other files
+    echo "$object : \\" > "$depfile"
+
+    # Clip off the initial element (the dependent).  Don't try to be
+    # clever and replace this with sed code, as IRIX sed won't handle
+    # lines with more than a fixed number of characters (4096 in
+    # IRIX 6.2 sed, 8192 in IRIX 6.5).  We also remove comment lines;
+    # the IRIX cc adds comments like `#:fec' to the end of the
+    # dependency line.
+    tr ' ' '
+' < "$tmpdepfile" \
+    | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
+    tr '
+' ' ' >> $depfile
+    echo >> $depfile
+
+    # The second pass generates a dummy entry for each header file.
+    tr ' ' '
+' < "$tmpdepfile" \
+   | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
+   >> $depfile
+  else
+    # The sourcefile does not contain any dependencies, so just
+    # store a dummy comment line, to avoid errors with the Makefile
+    # "include basename.Plo" scheme.
+    echo "#dummy" > "$depfile"
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+
+aix)
+  # The C for AIX Compiler uses -M and outputs the dependencies
+  # in a .u file.  This file always lives in the current directory.
+  # Also, the AIX compiler puts `$object:' at the start of each line;
+  # $object doesn't have directory information.
+  stripped=`echo "$object" | sed -e 's,^.*/,,' -e 's/\(.*\)\..*$/\1/'`
+  tmpdepfile="$stripped.u"
+  outname="$stripped.o"
+  if test "$libtool" = yes; then
+    "$@" -Wc,-M
+  else
+    "$@" -M
+  fi
+
+  stat=$?
+  if test $stat -eq 0; then :
+  else
+    rm -f "$tmpdepfile"
+    exit $stat
+  fi
+
+  if test -f "$tmpdepfile"; then
+    # Each line is of the form `foo.o: dependent.h'.
+    # Do two passes, one to just change these to
+    # `$object: dependent.h' and one to simply `dependent.h:'.
+    sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile"
+    sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile"
+  else
+    # The sourcefile does not contain any dependencies, so just
+    # store a dummy comment line, to avoid errors with the Makefile
+    # "include basename.Plo" scheme.
+    echo "#dummy" > "$depfile"
+  fi
+  rm -f "$tmpdepfile"
+  ;;
+
+tru64)
+   # The Tru64 compiler uses -MD to generate dependencies as a side
+   # effect.  `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
+   # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put 
+   # dependencies in `foo.d' instead, so we check for that too.
+   # Subdirectories are respected.
+
+   base=`echo "$object" | sed -e 's/\.o$//' -e 's/\.lo$//'`
+   tmpdepfile1="$base.o.d"
+   tmpdepfile2="$base.d"
+   if test "$libtool" = yes; then
+      "$@" -Wc,-MD
+   else
+      "$@" -MD
+   fi
+
+   stat=$?
+   if test $stat -eq 0; then :
+   else
+      rm -f "$tmpdepfile1" "$tmpdepfile2"
+      exit $stat
+   fi
+
+   if test -f "$tmpdepfile1"; then
+      tmpdepfile="$tmpdepfile1"
+   else
+      tmpdepfile="$tmpdepfile2"
+   fi
+   if test -f "$tmpdepfile"; then
+      sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
+      # That's a space and a tab in the [].
+      sed -e 's,^.*\.[a-z]*:[ 	]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
+   else
+      echo "#dummy" > "$depfile"
+   fi
+   rm -f "$tmpdepfile"
+   ;;
+
+#nosideeffect)
+  # This comment above is used by automake to tell side-effect
+  # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the proprocessed file to stdout, regardless of -o,
+  # because we must use -o when running libtool.
+  test -z "$dashmflag" && dashmflag=-M
+  ( IFS=" "
+    case " $* " in
+    *" --mode=compile "*) # this is libtool, let us make it quiet
+      for arg
+      do # cycle over the arguments
+        case "$arg" in
+	"--mode=compile")
+	  # insert --quiet before "--mode=compile"
+	  set fnord "$@" --quiet
+	  shift # fnord
+	  ;;
+	esac
+	set fnord "$@" "$arg"
+	shift # fnord
+	shift # "$arg"
+      done
+      ;;
+    esac
+    "$@" $dashmflag | sed 's:^[^:]*\:[ 	]*:'"$object"'\: :' > "$tmpdepfile"
+  ) &
+  proc=$!
+  "$@"
+  stat=$?
+  wait "$proc"
+  if test "$stat" != 0; then exit $stat; fi
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  tr ' ' '
+' < "$tmpdepfile" | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+dashXmstdout)
+  # This case only exists to satisfy depend.m4.  It is never actually
+  # run, as this mode is specially recognized in the preamble.
+  exit 1
+  ;;
+
+makedepend)
+  # X makedepend
+  (
+    shift
+    cleared=no
+    for arg in "$@"; do
+      case $cleared in no)
+        set ""; shift
+	cleared=yes
+      esac
+      case "$arg" in
+        -D*|-I*)
+	  set fnord "$@" "$arg"; shift;;
+	-*)
+	  ;;
+	*)
+	  set fnord "$@" "$arg"; shift;;
+      esac
+    done
+    obj_suffix="`echo $object | sed 's/^.*\././'`"
+    touch "$tmpdepfile"
+    ${MAKEDEPEND-makedepend} 2>/dev/null -o"$obj_suffix" -f"$tmpdepfile" "$@"
+  ) &
+  proc=$!
+  "$@"
+  stat=$?
+  wait "$proc"
+  if test "$stat" != 0; then exit $stat; fi
+  rm -f "$depfile"
+  cat < "$tmpdepfile" > "$depfile"
+  sed '1,2d' "$tmpdepfile" | tr ' ' '
+' | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly.  Breaking it into two sed invocations is a workaround.
+    sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile" "$tmpdepfile".bak
+  ;;
+
+cpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the proprocessed file to stdout, regardless of -o,
+  # because we must use -o when running libtool.
+  ( IFS=" "
+    case " $* " in
+    *" --mode=compile "*)
+      for arg
+      do # cycle over the arguments
+        case $arg in
+	"--mode=compile")
+	  # insert --quiet before "--mode=compile"
+	  set fnord "$@" --quiet
+	  shift # fnord
+	  ;;
+	esac
+	set fnord "$@" "$arg"
+	shift # fnord
+	shift # "$arg"
+      done
+      ;;
+    esac
+    "$@" -E |
+    sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
+    sed '$ s: \\$::' > "$tmpdepfile"
+  ) &
+  proc=$!
+  "$@"
+  stat=$?
+  wait "$proc"
+  if test "$stat" != 0; then exit $stat; fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  cat < "$tmpdepfile" >> "$depfile"
+  sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+msvisualcpp)
+  # Important note: in order to support this mode, a compiler *must*
+  # always write the proprocessed file to stdout, regardless of -o,
+  # because we must use -o when running libtool.
+  ( IFS=" "
+    case " $* " in
+    *" --mode=compile "*)
+      for arg
+      do # cycle over the arguments
+        case $arg in
+	"--mode=compile")
+	  # insert --quiet before "--mode=compile"
+	  set fnord "$@" --quiet
+	  shift # fnord
+	  ;;
+	esac
+	set fnord "$@" "$arg"
+	shift # fnord
+	shift # "$arg"
+      done
+      ;;
+    esac
+    for arg
+    do
+      case "$arg" in
+      "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
+	set fnord "$@"
+	shift
+	shift
+	;;
+      *)
+	set fnord "$@" "$arg"
+	shift
+	shift
+	;;
+      esac
+    done
+    "$@" -E |
+    sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
+  ) &
+  proc=$!
+  "$@"
+  stat=$?
+  wait "$proc"
+  if test "$stat" != 0; then exit $stat; fi
+  rm -f "$depfile"
+  echo "$object : \\" > "$depfile"
+  . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::	\1 \\:p' >> "$depfile"
+  echo "	" >> "$depfile"
+  . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
+  rm -f "$tmpdepfile"
+  ;;
+
+none)
+  exec "$@"
+  ;;
+
+*)
+  echo "Unknown depmode $depmode" 1>&2
+  exit 1
+  ;;
+esac
+
+exit 0
diff --git a/boehm-gc/doc/Makefile.am b/boehm-gc/doc/Makefile.am
new file mode 100644
index 00000000000..91446305581
--- /dev/null
+++ b/boehm-gc/doc/Makefile.am
@@ -0,0 +1,27 @@
+# 
+# 
+# THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+# OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+# 
+# Permission is hereby granted to use or copy this program
+# for any purpose,  provided the above notices are retained on all copies.
+# Permission to modify the code and to distribute modified code is granted,
+# provided the above notices are retained, and a notice that the code was
+# modified is included with the above copyright notice.
+#
+# Modified by: Grzegorz Jakacki <jakacki at acm dot org>
+
+## Process this file with automake to produce Makefile.in.
+
+# installed documentation
+#
+dist_pkgdata_DATA = barrett_diagram debugging.html gc.man \
+    gcdescr.html README README.amiga README.arm.cross \
+    README.autoconf README.changes README.contributors \
+    README.cords README.DGUX386 README.dj README.environment \
+    README.ews4800 README.hp README.linux README.Mac \
+    README.MacOSX README.macros README.OS2 README.rs6000 \
+    README.sgi README.solaris2 README.uts README.win32 \
+    tree.html leak.html gcinterface.html scale.html \
+    README.darwin
+
diff --git a/boehm-gc/doc/Makefile.in b/boehm-gc/doc/Makefile.in
new file mode 100644
index 00000000000..9bf1ff5fead
--- /dev/null
+++ b/boehm-gc/doc/Makefile.in
@@ -0,0 +1,282 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# 
+# 
+# THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+# OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+# 
+# Permission is hereby granted to use or copy this program
+# for any purpose,  provided the above notices are retained on all copies.
+# Permission to modify the code and to distribute modified code is granted,
+# provided the above notices are retained, and a notice that the code was
+# modified is included with the above copyright notice.
+#
+# Modified by: Grzegorz Jakacki <jakacki at acm dot org>
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AWK = @AWK@
+CC = @CC@
+CCAS = @CCAS@
+CCASFLAGS = @CCASFLAGS@
+CFLAGS = @CFLAGS@
+CXX = @CXX@
+CXXFLAGS = @CXXFLAGS@
+CXXINCLUDES = @CXXINCLUDES@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+ECHO = @ECHO@
+EXTRA_TEST_LIBS = @EXTRA_TEST_LIBS@
+GC_CFLAGS = @GC_CFLAGS@
+GC_VERSION = @GC_VERSION@
+INCLUDES = @INCLUDES@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+MAINT = @MAINT@
+MY_CFLAGS = @MY_CFLAGS@
+OBJDUMP = @OBJDUMP@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+STRIP = @STRIP@
+THREADLIBS = @THREADLIBS@
+VERSION = @VERSION@
+addincludes = @addincludes@
+addlibs = @addlibs@
+addobjs = @addobjs@
+addtests = @addtests@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+target_all = @target_all@
+
+# installed documentation
+#
+dist_pkgdata_DATA = barrett_diagram debugging.html gc.man \
+    gcdescr.html README README.amiga README.arm.cross \
+    README.autoconf README.changes README.contributors \
+    README.cords README.DGUX386 README.dj README.environment \
+    README.ews4800 README.hp README.linux README.Mac \
+    README.MacOSX README.macros README.OS2 README.rs6000 \
+    README.sgi README.solaris2 README.uts README.win32 \
+    tree.html leak.html gcinterface.html scale.html \
+    README.darwin
+
+subdir = doc
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_CLEAN_FILES =
+DIST_SOURCES =
+DATA = $(dist_pkgdata_DATA)
+
+DIST_COMMON = README $(dist_pkgdata_DATA) Makefile.am Makefile.in
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am  $(top_srcdir)/configure.in $(ACLOCAL_M4)
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu  doc/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+distclean-libtool:
+	-rm -f libtool
+uninstall-info-am:
+dist_pkgdataDATA_INSTALL = $(INSTALL_DATA)
+install-dist_pkgdataDATA: $(dist_pkgdata_DATA)
+	@$(NORMAL_INSTALL)
+	$(mkinstalldirs) $(DESTDIR)$(pkgdatadir)
+	@list='$(dist_pkgdata_DATA)'; for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  echo " $(dist_pkgdataDATA_INSTALL) $$d$$p $(DESTDIR)$(pkgdatadir)/$$f"; \
+	  $(dist_pkgdataDATA_INSTALL) $$d$$p $(DESTDIR)$(pkgdatadir)/$$f; \
+	done
+
+uninstall-dist_pkgdataDATA:
+	@$(NORMAL_UNINSTALL)
+	@list='$(dist_pkgdata_DATA)'; for p in $$list; do \
+	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  echo " rm -f $(DESTDIR)$(pkgdatadir)/$$f"; \
+	  rm -f $(DESTDIR)$(pkgdatadir)/$$f; \
+	done
+tags: TAGS
+TAGS:
+
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+	@list='$(DISTFILES)'; for file in $$list; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+	  if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+	    dir="/$$dir"; \
+	    $(mkinstalldirs) "$(distdir)$$dir"; \
+	  else \
+	    dir=''; \
+	  fi; \
+	  if test -d $$d/$$file; then \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(DATA)
+
+installdirs:
+	$(mkinstalldirs) $(DESTDIR)$(pkgdatadir)
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-generic distclean-libtool
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-dist_pkgdataDATA
+
+install-exec-am:
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+uninstall-am: uninstall-dist_pkgdataDATA uninstall-info-am
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+	distclean distclean-generic distclean-libtool distdir dvi \
+	dvi-am info info-am install install-am install-data \
+	install-data-am install-dist_pkgdataDATA install-exec \
+	install-exec-am install-info install-info-am install-man \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic mostlyclean-libtool uninstall uninstall-am \
+	uninstall-dist_pkgdataDATA uninstall-info-am
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/boehm-gc/doc/README.DGUX386 b/boehm-gc/doc/README.DGUX386
new file mode 100644
index 00000000000..9d6d84788ef
--- /dev/null
+++ b/boehm-gc/doc/README.DGUX386
@@ -0,0 +1,215 @@
+    Garbage Collector (parallel iversion) for ix86 DG/UX Release R4.20MU07
+
+
+     *READ* the file README.QUICK.
+
+     You need the GCC-3.0.3 rev (DG/UX) compiler to build this tree.
+     This compiler has the new "dgux386" threads package implemented.
+     It also supports the switch "-pthread" needed to link correctly
+     the DG/UX's -lrte -lthread with -lgcc and the system's -lc. 
+     Finally we support parralleli-mark for the SMP DG/UX machines.
+     To build the garbage collector do:
+      
+      ./configure --enable-parallel-mark
+      make
+      make gctest
+
+     Before you run "gctest" you need to set your LD_LIBRARY_PATH
+     correctly so that "gctest" can find the shared library libgc.
+     Alternatively you can do a configuration
+
+      ./configure --enable-parallel-mark --disable-shared
+  
+     to build only the static version of libgc.
+  
+     To enable debugging messages please do:
+     1) Add the "--enable-full-debug" flag during configuration. 
+     2) Edit the file linux-threads.c and uncommnect the line:
+
+     /* #define DEBUG_THREADS 1 */ to ---> 
+
+     #define DEBUG_THREADS 1
+
+     Then give "make" as usual.
+    
+     In a machine with 4 CPUs (my own machine) the option parallel
+     mark (aka --enable-parallel-mark) makes a BIG difference.
+
+     Takis Psarogiannakopoulos
+     University of Cambridge
+     Centre for Mathematical Sciences
+     Department of Pure Mathematics
+     Wilberforce Road
+     Cambridge CB3 0WB ,UK , <takis@XFree86.Org>
+     January 2002
+
+
+Note (HB):
+     The integration of this patch is currently not complete.
+     The following patches against 6.1alpha3 where hard to move
+     to alpha4, and are not integrated.  There may also be minor
+     problems with stylistic corrections made by me.
+
+
+--- ltconfig.ORIG	Mon Jan 28 20:22:18 2002
++++ ltconfig	Mon Jan 28 20:44:00 2002
+@@ -689,6 +689,11 @@
+        pic_flag=-Kconform_pic
+     fi
+     ;;
++  dgux*)
++    pic_flag='-fPIC'
++    link_static='-Bstatic'
++    wl='-Wl,'
++    ;;
+   *)
+     pic_flag='-fPIC'
+     ;;
+@@ -718,6 +723,12 @@
+     # We can build DLLs from non-PIC.
+     ;;
+ 
++  dgux*)
++    pic_flag='-KPIC'
++    link_static='-Bstatic'
++    wl='-Wl,'
++    ;;
++
+   osf3* | osf4* | osf5*)
+     # All OSF/1 code is PIC.
+     wl='-Wl,'
+@@ -1154,6 +1165,22 @@
+     fi
+     ;;
+ 
++  dgux*)
++    ld_shlibs=yes
++    # For both C/C++ ommit the deplibs. This is because we relying on the fact
++    # that compilation of execitables will put them in correct order
++    # in any case and sometimes are wrong when listed as deplibs (or missing some deplibs)
++    # However when GNU ld and --whole-archive needs to be used we have the problem
++    # that if the -fPIC *_s.a archive is linked through deplibs list we ommiting crucial
++    # .lo/.o files from the created shared lib. This I think is not the case here.
++    archive_cmds='$CC -shared -h $soname -o $lib $libobjs $linkopts'
++    thread_safe_flag_spec='-pthread'
++    wlarc=
++    hardcode_libdir_flag_spec='-L$libdir'
++    hardcode_shlibpath_var=no
++    ac_cv_archive_cmds_needs_lc=no
++    ;;
++
+   cygwin* | mingw*)
+     # hardcode_libdir_flag_spec is actually meaningless, as there is
+     # no search path for DLLs.
+@@ -1497,7 +1524,7 @@
+     ;;
+ 
+   dgux*)
+-    archive_cmds='$LD -G -h $soname -o $lib $libobjs $deplibs $linkopts'
++    archive_cmds='$CC -shared -h $soname -o $lib $libobjs $linkopts'
+     hardcode_libdir_flag_spec='-L$libdir'
+     hardcode_shlibpath_var=no
+     ;;
+@@ -2092,12 +2119,17 @@
+   ;;
+ 
+ dgux*)
+-  version_type=linux
++  version_type=dgux
+   need_lib_prefix=no
+   need_version=no
+-  library_names_spec='${libname}${release}.so$versuffix ${libname}${release}.so$major $libname.so'
+-  soname_spec='${libname}${release}.so$major'
++  library_names_spec='$libname.so$versuffix'
++  soname_spec='$libname.so$versuffix'
+   shlibpath_var=LD_LIBRARY_PATH
++  thread_safe_flag_spec='-pthread'
++  wlarc=
++  hardcode_libdir_flag_spec='-L$libdir'
++  hardcode_shlibpath_var=no
++  ac_cv_archive_cmds_needs_lc=no
+   ;;
+ 
+ sysv4*MP*)
+
+
+--- ltmain.sh.ORIG	Mon Jan 28 20:31:18 2002
++++ ltmain.sh	Tue Jan 29 00:11:29 2002
+@@ -1072,11 +1072,38 @@
+ 	esac
+ 	;;
+ 
++      -thread*)
++	# DG/UX GCC 2.95.x, 3.x.x rev (DG/UX) links -lthread
++	# with the switch -threads
++	if test "$arg" = "-threads"; then
++	  case "$host" in
++	  i[3456]86-*-dgux*)
++	    deplibs="$deplibs $arg"
++	    continue
++	    ;;
++	  esac
++	fi
++	;;
++
++      -pthread*)
++	# DG/UX GCC 2.95.x, 3.x.x rev (DG/UX) links -lthread
++	# with the switch -pthread
++	if test "$arg" = "-pthread"; then
++	  case "$host" in
++	  i[3456]86-*-dgux*)
++	    deplibs="$deplibs $arg"
++	    continue
++	    ;;
++	  esac
++	fi
++	;;
++
+       -l*)
+ 	if test "$arg" = "-lc"; then
+ 	  case "$host" in
+-	  *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*)
++	  *-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos* | i[3456]86-*-dgux*)
+ 	    # These systems don't actually have c library (as such)
++	    # It is wrong in DG/UX to add -lc when creating shared/dynamic objs/libs
+ 	    continue
+ 	    ;;
+ 	  esac
+@@ -1248,6 +1275,12 @@
+ 	  temp_deplibs=
+ 	  for deplib in $dependency_libs; do
+ 	    case "$deplib" in
++	    -thread*)
++		 temp_deplibs="$temp_deplibs $deplib"
++		 ;;
++	    -pthread)
++		 temp_deplibs="$temp_deplibs $deplib"
++		 ;;
+ 	    -R*) temp_xrpath=`$echo "X$deplib" | $Xsed -e 's/^-R//'`
+ 		 case " $rpath $xrpath " in
+ 		 *" $temp_xrpath "*) ;;
+@@ -1709,6 +1742,13 @@
+ 	  done
+ 	  ;;
+ 
++	dgux)
++	  # Leave mostly blank for DG/UX
++	  major=
++	  versuffix=".$current.$revision";
++	  verstring=
++	  ;;
++
+ 	linux)
+ 	  major=.`expr $current - $age`
+ 	  versuffix="$major.$age.$revision"
+@@ -1792,8 +1832,9 @@
+ 
+ 	dependency_libs="$deplibs"
+ 	case "$host" in
+-	*-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*)
++	*-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos* | i[3456]86-*-dgux*)
+ 	  # these systems don't actually have a c library (as such)!
++	  # It is wrong in DG/UX to add -lc when creating shared/dynamic objs/libs
+ 	  ;;
+ 	*)
+ 	  # Add libc to deplibs on all other systems.
diff --git a/boehm-gc/doc/README.arm.cross b/boehm-gc/doc/README.arm.cross
new file mode 100644
index 00000000000..96744edaf67
--- /dev/null
+++ b/boehm-gc/doc/README.arm.cross
@@ -0,0 +1,68 @@
+From: Margaret Fleck
+
+Here's the key details of what worked for me, in case anyone else needs them.
+There may well be better ways to do some of this, but ....
+   -- Margaret
+
+
+The badge4 has a StrongArm-1110 processor and a StrongArm-1111 coprocessor.  
+
+Assume that the garbage collector distribution is unpacked into /home/arm/gc6.0,
+which is visible to both the ARM machine and a linux desktop (e.g. via NFS mounting).
+
+Assume that you have a file /home/arm/config.site with contents something like the
+example attached below.  Notice that our local ARM toolchain lives in
+/skiff/local.
+
+Go to /home/arm/gc6.0 directory.  Do
+  CONFIG_SITE=/home/arm/config.site ./configure --target=arm-linux
+--prefix=/home/arm/gc6.0
+
+On your desktop, do:
+   make
+   make install
+The main garbage collector library should now be in ../gc6.0/lib/libgc.so.  
+
+To test the garbage collector, first do the following on your desktop
+   make gctest
+   ./gctest
+Then do the following on the ARM machine
+   cd .libs
+   ./lt-gctest
+
+Do not try to do "make test" (the usual way of running the test
+program).  This does not work and seems to erase some of the important
+files.
+
+The gctest program claims to have succeeded.  Haven't run any further tests
+with it, though I'll be doing so in the near future.
+
+-------------------------------
+# config.site for configure
+
+# Modified from the one provided by Bradley D. LaRonde
+# Edited by Andrej Cedilnik <acedil1@csee.umbc.edu>
+# Used some of solutions by Tilman Vogel <Tilman.Vogel@web.de>
+# Ported for iPAQ Familiar by Oliver Kurth <oliver.kurth@innominate.com>
+# Further modified by Margaret Fleck for the badge4
+
+HOSTCC=gcc
+
+# Names of the cross-compilers
+CC=/skiff/local/bin/arm-linux-gcc
+CXX=/skiff/local/bin/arm-linux-gcc
+
+# The cross compiler specific options
+CFLAGS="-O2 -fno-exceptions"
+CXXFLAGS="-O2 -fno-exceptions"
+CPPFLAGS="-O2 -fno-exceptions"
+LDFLAGS=""
+
+# Some other programs
+AR=/skiff/local/bin/arm-linux-ar
+RANLIB=/skiff/local/bin/arm-linux-ranlib
+NM=/skiff/local/bin/arm-linux-nm
+ac_cv_path_NM=/skiff/local/bin/arm-linux-nm
+ac_cv_func_setpgrp_void=yes
+x_includes=/skiff/local/arm-linux/include/X11
+x_libraries=/skiff/local/arm-linux/lib/X11
diff --git a/boehm-gc/doc/README.darwin b/boehm-gc/doc/README.darwin
new file mode 100644
index 00000000000..3cd1b818b19
--- /dev/null
+++ b/boehm-gc/doc/README.darwin
@@ -0,0 +1,106 @@
+Darwin/MacOSX Support - July 22, 2003
+====================================
+
+Important Usage Notes
+=====================
+
+GC_init() MUST be called before calling any other GC functions. This 
+is necessary to properly register segments in dynamic libraries. This
+call is required even if you code does not use dynamic libraries as the
+dyld code handles registering all data segments.
+
+When your use of the garbage collector is confined to dylibs and you
+cannot call GC_init() before your libraries' static initializers have
+run and perhaps called GC_malloc(), create an initialization routine
+for each library to call GC_init():
+
+#include <gc/gc.h>
+void my_library_init() { GC_init(); }
+
+Compile this code into a my_library_init.o, and link it into your
+dylib. When you link the dylib, pass the -init argument with
+_my_library_init (e.g. gcc -dynamiclib -o my_library.dylib a.o b.o c.o
+my_library_init.o -init _my_library_init). This causes
+my_library_init() to be called before any static initializers, and
+will initialize the garbage collector properly. 
+
+Note: It doesn't hurt to call GC_init() more than once, so it's best,
+if you have an application or set of libraries that all use the
+garbage collector, to create an initialization routine for each of
+them that calls GC_init(). Better safe than sorry. 
+
+The incremental collector is still a bit flaky on darwin. It seems to 
+work reliably with workarounds for a few possible bugs in place however
+these workaround may not work correctly in all cases. There may also
+be additional problems that I have not found. 
+
+Implementation Information
+==========================
+Darwin/MacOSX support is nearly complete. Thread support is reliable on 
+Darwin 6.x (MacOSX 10.2) and there have been reports of success on older
+Darwin versions (MacOSX 10.1). Shared library support had also been
+added and the gc can be run from a shared library. There is currently only
+support for Darwin/PPC although adding x86 support should be trivial.
+
+Thread support is implemented in terms of mach thread_suspend and 
+thread_resume calls. These provide a very clean interface to thread
+suspension. This implementation doesn't rely on pthread_kill so the
+code works on Darwin < 6.0 (MacOSX 10.1). All the code to stop the
+world is located in darwin_stop_world.c.
+
+The original incremental collector support unfortunatelly no longer works
+on recent Darwin versions. It also relied on some undocumented kernel
+structures. Mach, however, does have a very clean interface to exception
+handing. The current implementation uses Mach's exception handling. 
+
+Much thanks goes to Andrew Stone, Dietmar Planitzer, Andrew Begel, 
+Jeff Sturm, and Jesse Rosenstock for all their work on the 
+Darwin/OS X port.
+
+-Brian Alliet
+brian@brianweb.net
+
+
+Older Information (Most of this no longer applies to the current code)
+======================================================================
+
+While the GC should work on MacOS X Server, MacOS X and Darwin, I only tested
+it on MacOS X Server.
+I've added a PPC assembly version of GC_push_regs(), thus the setjmp() hack is
+no longer necessary. Incremental collection is supported via mprotect/signal.
+The current solution isn't really optimal because the signal handler must decode
+the faulting PPC machine instruction in order to find the correct heap address.
+Further, it must poke around in the register state which the kernel saved away
+in some obscure register state structure before it calls the signal handler -
+needless to say the layout of this structure is no where documented.
+Threads and dynamic libraries are not yet supported (adding dynamic library
+support via the low-level dyld API shouldn't be that hard).
+
+The original MacOS X port was brought to you by Andrew Stone.
+
+
+June, 1 2000
+
+Dietmar Planitzer
+dave.pl@ping.at
+
+Note from Andrew Begel:
+
+One more fix to enable gc.a to link successfully into a shared library for
+MacOS X. You have to add -fno-common to the CFLAGS in the Makefile. MacOSX
+disallows common symbols in anything that eventually finds its way into a
+shared library. (I don't completely understand why, but -fno-common seems to
+work and doesn't mess up the garbage collector's functionality).
+
+Feb 26, 2003
+
+Jeff Sturm and Jesse Rosenstock provided a patch that adds thread support.
+GC_MACOSX_THREADS should be defined in the build and in clients.  Real
+dynamic library support is still missing, i.e. dynamic library data segments
+are still not scanned.  Code that stores pointers to the garbage collected
+heap in statically allocated variables should not reside in a dynamic
+library.  This still doesn't appear to be 100% reliable.  
+
+Mar 10, 2003
+Brian Alliet contributed dynamic library support for MacOSX.  It could also
+use more testing.
diff --git a/boehm-gc/doc/gcinterface.html b/boehm-gc/doc/gcinterface.html
new file mode 100644
index 00000000000..7b336ec811b
--- /dev/null
+++ b/boehm-gc/doc/gcinterface.html
@@ -0,0 +1,203 @@
+<!DOCTYPE HTML>
+<HEAD>
+<TITLE>Garbage Collector Interface</TITLE>
+</HEAD>
+<BODY>
+<H1>C Interface</h1>
+On many platforms, a single-threaded garbage collector library can be built
+to act as a plug-in malloc replacement.  (Build with -DREDIRECT_MALLOC=GC_malloc
+-DIGNORE_FREE.)  This is often the best way to deal with third-party libraries
+which leak or prematurely free objects.  -DREDIRECT_MALLOC is intended
+primarily as an easy way to adapt old code, not for new development.
+<P>
+New code should use the interface discussed below.
+<P>
+Code must be linked against the GC library.  On most UNIX platforms,
+this will be gc.a.
+<P>
+The following describes the standard C interface to the garbage collector.
+It is not a complete definition of the interface.  It describes only the
+most commonly used functionality, approximately in decreasing order of
+frequency of use.  The description assumes an ANSI C compiler.
+The full interface is described in
+<A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a>
+or <TT>gc.h</tt> in the distribution.
+<P>
+Clients should include gc.h.
+<P>
+In the case of multithreaded code,
+gc.h should be included after the threads header file, and
+after defining the appropriate GC_XXXX_THREADS macro.
+(For 6.2alpha4 and later, simply defining GC_THREADS should suffice.)
+Gc.h must be included
+in files that use either GC or threads primitives, since threads primitives
+will be redefined to cooperate with the GC on many platforms.
+<DL>
+<DT> <B>void * GC_MALLOC(size_t <I>nbytes</i>)</b>
+<DD>
+Allocates and clears <I>nbytes</i> of storage.
+Requires (amortized) time proportional to <I>nbytes</i>.
+The resulting object will be automatically deallocated when unreferenced.
+References from objects allocated with the system malloc are usually not
+considered by the collector.  (See GC_MALLOC_UNCOLLECTABLE, however.)
+GC_MALLOC is a macro which invokes GC_malloc by default or, if GC_DEBUG
+is defined before gc.h is included, a debugging version that checks
+occasionally for overwrite errors, and the like.
+<DT> <B>void * GC_MALLOC_ATOMIC(size_t <I>nbytes</i>)</b>
+<DD>
+Allocates <I>nbytes</i> of storage.
+Requires (amortized) time proportional to <I>nbytes</i>.
+The resulting object will be automatically deallocated when unreferenced.
+The client promises that the resulting object will never contain any pointers.
+The memory is not cleared.
+This is the preferred way to allocate strings, floating point arrays,
+bitmaps, etc.
+More precise information about pointer locations can be communicated to the
+collector using the interface in
+<A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gc_typedh.txt">gc_typed.h</a> in the distribution.
+<DT> <B>void * GC_MALLOC_UNCOLLECTABLE(size_t <I>nbytes</i>)</b>
+<DD>
+Identical to GC_MALLOC, except that the resulting object is not automatically
+deallocated.  Unlike the system-provided malloc, the collector does
+scan the object for pointers to garbage-collectable memory, even if the
+block itself does not appear to be reachable.  (Objects allocated in this way
+are effectively treated as roots by the collector.)
+<DT> <B> void * GC_REALLOC(void *old, size_t new_size) </b>
+<DD>
+Allocate a new object of the indicated size and copy (a prefix of) the
+old object into the new object.  The old object is reused in place if
+convenient.  If the original object was allocated with GC_malloc_atomic,
+the new object is subject to the same constraints.  If it was allocated
+as an uncollectable object, then the new object is uncollectable, and
+the old object (if different) is deallocated.
+(Use GC_REALLOC with GC_MALLOC, etc.)
+<DT> <B> void GC_FREE(void *dead) </b>
+<DD>
+Explicitly deallocate an object.  Typically not useful for small
+collectable objects.  (Use GC_FREE with GC_MALLOC, etc.)
+<DT> <B> void * GC_MALLOC_IGNORE_OFF_PAGE(size_t <I>nbytes</i>) </b>
+<DD>
+<DT> <B> void * GC_MALLOC_ATOMIC_IGNORE_OFF_PAGE(size_t <I>nbytes</i>) </b>
+<DD>
+Analogous to GC_MALLOC and GC_MALLOC_ATOMIC, except that the client
+guarantees that as long
+as the resulting object is of use, a pointer is maintained to someplace
+inside the first 512 bytes of the object.  This pointer should be declared
+volatile to avoid interference from compiler optimizations.
+(Other nonvolatile pointers to the object may exist as well.)
+This is the
+preferred way to allocate objects that are likely to be > 100KBytes in size.
+It greatly reduces the risk that such objects will be accidentally retained
+when they are no longer needed.  Thus space usage may be significantly reduced.
+<DT> <B> void GC_gcollect(void) </b>
+<DD>
+Explicitly force a garbage collection.
+<DT> <B> void GC_enable_incremental(void) </b>
+<DD>
+Cause the garbage collector to perform a small amount of work
+every few invocations of GC_malloc or the like, instead of performing
+an entire collection at once.  This is likely to increase total
+running time.  It will improve response on a platform that either has
+suitable support in the garbage collector (Irix and most other Unix
+versions, win32 if the collector was suitably built) or if "stubborn"
+allocation is used (see <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a>).
+On many platforms this interacts poorly with system calls 
+that write to the garbage collected heap.
+<DT> <B> GC_warn_proc GC_set_warn_proc(GC_warn_proc p) </b>
+<DD>
+Replace the default procedure used by the collector to print warnings.
+The collector
+may otherwise write to sterr, most commonly because GC_malloc was used
+in a situation in which GC_malloc_ignore_off_page would have been more
+appropriate.  See <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> for details.
+<DT> <B> void GC_register_finalizer(...) </b>
+<DD>
+Register a function to be called when an object becomes inaccessible.
+This is often useful as a backup method for releasing system resources
+(<I>e.g.</i> closing files) when the object referencing them becomes
+inaccessible.
+It is not an acceptable method to perform actions that must be performed
+in a timely fashion.
+See <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> for details of the interface.
+See <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/finalization.html">here</a> for a more detailed discussion
+of the design.
+<P>
+Note that an object may become inaccessible before client code is done
+operating on its fields.  Suitable synchronization is usually required.
+See <A HREF="http://portal.acm.org/citation.cfm?doid=604131.604153">here</a>
+or <A HREF="http://www.hpl.hp.com/techreports/2002/HPL-2002-335.html">here</a>
+for details.
+</dl>
+<P>
+If you are concerned with multiprocessor performance and scalability,
+you should consider enabling and using thread local allocation (<I>e.g.</i>
+GC_LOCAL_MALLOC, see <TT>gc_local_alloc.h</tt>.  If your platform
+supports it, you should build the collector with parallel marking support
+(-DPARALLEL_MARK, or --enable-parallel-mark).
+<P>
+If the collector is used in an environment in which pointer location
+information for heap objects is easily available, this can be passed on
+to the colllector using the interfaces in either <TT>gc_typed.h</tt>
+or <TT>gc_gcj.h</tt>.
+<P>
+The collector distribution also includes a <B>string package</b> that takes
+advantage of the collector.  For details see
+<A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/cordh.txt">cord.h</a>
+
+<H1>C++ Interface</h1>
+There are three distinct ways to use the collector from C++:
+<DL>
+<DT> <B> STL allocators </b>
+<DD>
+Users of the <A HREF="http://www.sgi.com/tech/stl">SGI extended STL</a>
+can include <TT>new_gc_alloc.h</tt> before including
+STL header files.
+(<TT>gc_alloc.h</tt> corresponds to now obsolete versions of the
+SGI STL.)
+This defines SGI-style allocators
+<UL>
+<LI> alloc
+<LI> single_client_alloc
+<LI> gc_alloc
+<LI> single_client_gc_alloc
+</ul>
+which may be used either directly to allocate memory or to instantiate
+container templates.  The first two allocate uncollectable but traced
+memory, while the second two allocate collectable memory.
+The single_client versions are not safe for concurrent access by
+multiple threads, but are faster.
+<P>
+For an example, click <A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_alloc_exC.txt">here</a>.
+<P>
+Recent versions of the collector also include a more standard-conforming
+allocator implemention in <TT>gc_allocator.h</tt>.  It defines
+<UL>
+<LI> traceable_allocator
+<LI> gc_allocator
+</ul>
+Again the former allocates uncollectable but traced memory.
+This should work with any fully standard-conforming C++ compiler.
+<DT> <B> Class inheritance based interface </b>
+<DD>
+Users may include gc_cpp.h and then cause members of certain classes to
+be allocated in garbage collectable memory by inheriting from class gc.
+For details see <A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gc_cpph.txt">gc_cpp.h</a>.
+<DT> <B> C interface </b>
+<DD>
+It is also possible to use the C interface from 
+<A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> directly.
+On platforms which use malloc to implement ::new, it should usually be possible
+to use a version of the collector that has been compiled as a malloc
+replacement.  It is also possible to replace ::new and other allocation
+functions suitably.
+<P>
+Note that user-implemented small-block allocation often works poorly with
+an underlying garbage-collected large block allocator, since the collector
+has to view all objects accessible from the user's free list as reachable.
+This is likely to cause problems if GC_malloc is used with something like
+the original HP version of STL.
+This approach works with the SGI versions of the STL only if the
+<TT>malloc_alloc</tt> allocator is used.
+</dl>
+</body>
+</html>
diff --git a/boehm-gc/doc/leak.html b/boehm-gc/doc/leak.html
new file mode 100644
index 00000000000..91fa8ea8402
--- /dev/null
+++ b/boehm-gc/doc/leak.html
@@ -0,0 +1,197 @@
+<HTML>
+<HEAD>
+<TITLE>Using the Garbage Collector as Leak Detector</title>
+</head>
+<BODY>
+<H1>Using the Garbage Collector as Leak Detector</h1>
+The garbage collector may be used as a leak detector.
+In this case, the primary function of the collector is to report
+objects that were allocated (typically with <TT>GC_MALLOC</tt>),
+not deallocated (normally with <TT>GC_FREE</tt>), but are
+no longer accessible.  Since the object is no longer accessible,
+there in normally no way to deallocate the object at a later time;
+thus it can safely be assumed that the object has been "leaked".
+<P>
+This is substantially different from counting leak detectors,
+which simply verify that all allocated objects are eventually
+deallocated.  A garbage-collector based leak detector can provide
+somewhat more precise information when an object was leaked.
+More importantly, it does not report objects that are never
+deallocated because they are part of "permanent" data structures.
+Thus it does not require all objects to be deallocated at process
+exit time, a potentially useless activity that often triggers
+large amounts of paging.
+<P>
+All non-ancient versions of the garbage collector provide
+leak detection support.  Version 5.3 adds the following
+features:
+<OL>
+<LI> Leak detection mode can be initiated at run-time by
+setting GC_find_leak instead of building the collector with FIND_LEAK
+defined.  This variable should be set to a nonzero value
+at program startup.
+<LI> Leaked objects should be reported and then correctly garbage collected.
+Prior versions either reported leaks or functioned as a garbage collector.
+</ol>
+For the rest of this description we will give instructions that work
+with any reasonable version of the collector.
+<P>
+To use the collector as a leak detector, follow the following steps:
+<OL>
+<LI> Build the collector with -DFIND_LEAK.  Otherwise use default
+build options.
+<LI> Change the program so that all allocation and deallocation goes
+through the garbage collector.
+<LI> Arrange to call <TT>GC_gcollect</tt> at appropriate points to check
+for leaks.
+(For sufficiently long running programs, this will happen implicitly,
+but probably not with sufficient frequency.)
+</ol>
+The second step can usually be accomplished with the
+<TT>-DREDIRECT_MALLOC=GC_malloc</tt> option when the collector is built,
+or by defining <TT>malloc</tt>, <TT>calloc</tt>,
+<TT>realloc</tt> and <TT>free</tt>
+to call the corresponding garbage collector functions.
+But this, by itself, will not yield very informative diagnostics,
+since the collector does not keep track of information about
+how objects were allocated.  The error reports will include
+only object addresses.
+<P>
+For more precise error reports, as much of the program as possible
+should use the all uppercase variants of these functions, after
+defining <TT>GC_DEBUG</tt>, and then including <TT>gc.h</tt>.
+In this environment <TT>GC_MALLOC</tt> is a macro which causes
+at least the file name and line number at the allocation point to
+be saved as part of the object.  Leak reports will then also include
+this information.
+<P>
+Many collector features (<I>e.g</i> stubborn objects, finalization,
+and disappearing links) are less useful in this context, and are not
+fully supported.  Their use will usually generate additional bogus
+leak reports, since the collector itself drops some associated objects.
+<P>
+The same is generally true of thread support.  However, as of 6.0alpha4,
+correct leak reports should be generated with linuxthreads.
+<P>
+On a few platforms (currently Solaris/SPARC, Irix, and, with -DSAVE_CALL_CHAIN,
+Linux/X86), <TT>GC_MALLOC</tt>
+also causes some more information about its call stack to be saved
+in the object.  Such information is reproduced in the error
+reports in very non-symbolic form, but it can be very useful with the
+aid of a debugger.
+<H2>An Example</h2>
+The following header file <TT>leak_detector.h</tt> is included in the
+"include" subdirectory of the distribution:
+<PRE>
+#define GC_DEBUG
+#include "gc.h"
+#define malloc(n) GC_MALLOC(n)
+#define calloc(m,n) GC_MALLOC((m)*(n))
+#define free(p) GC_FREE(p)
+#define realloc(p,n) GC_REALLOC((p),(n))
+#define CHECK_LEAKS() GC_gcollect()
+</pre>
+<P>
+Assume the collector has been built with -DFIND_LEAK.  (For very
+new versions of the collector, we could instead add the statement
+<TT>GC_find_leak = 1</tt> as the first statement in <TT>main</tt>.
+<P>
+The program to be tested for leaks can then look like:
+<PRE>
+#include "leak_detector.h"
+
+main() {
+    int *p[10];
+    int i;
+    /* GC_find_leak = 1; for new collector versions not 	*/
+    /* compiled with -DFIND_LEAK.				*/
+    for (i = 0; i < 10; ++i) {
+	p[i] = malloc(sizeof(int)+i);
+    }
+    for (i = 1; i < 10; ++i) {
+	free(p[i]);
+    }
+    for (i = 0; i < 9; ++i) {
+	p[i] = malloc(sizeof(int)+i);
+    }
+    CHECK_LEAKS();
+}	
+</pre>
+<P>
+On an Intel X86 Linux system this produces on the stderr stream:
+<PRE>
+Leaked composite object at 0x806dff0 (leak_test.c:8, sz=4)
+</pre>
+(On most unmentioned operating systems, the output is similar to this.
+If the collector had been built on Linux/X86 with -DSAVE_CALL_CHAIN,
+the output would be closer to the Solaris example. For this to work,
+the program should not be compiled with -fomit_frame_pointer.)
+<P>
+On Irix it reports
+<PRE>
+Leaked composite object at 0x10040fe0 (leak_test.c:8, sz=4)
+        Caller at allocation:
+                ##PC##= 0x10004910
+</pre>
+and on Solaris the error report is
+<PRE>
+Leaked composite object at 0xef621fc8 (leak_test.c:8, sz=4)
+        Call chain at allocation:
+                args: 4 (0x4), 200656 (0x30FD0)
+                ##PC##= 0x14ADC
+                args: 1 (0x1), -268436012 (0xEFFFFDD4)
+                ##PC##= 0x14A64
+</pre>
+In the latter two cases some additional information is given about
+how malloc was called when the leaked object was allocated.  For
+Solaris, the first line specifies the arguments to <TT>GC_debug_malloc</tt>
+(the actual allocation routine), The second the program counter inside
+main, the third the arguments to <TT>main</tt>, and finally the program
+counter inside the caller to main (i.e. in the C startup code).
+<P>
+In the Irix case, only the address inside the caller to main is given.
+<P>
+In many cases, a debugger is needed to interpret the additional information.
+On systems supporting the "adb" debugger, the <TT>callprocs</tt> script
+can be used to replace program counter values with symbolic names.
+As of version 6.1, the collector tries to generate symbolic names for
+call stacks if it knows how to do so on the platform.  This is true on
+Linux/X86, but not on most other platforms.
+<H2>Simplified leak detection under Linux</h2>
+Since version 6.1, it should be possible to run the collector in leak
+detection mode on a program a.out under Linux/X86 as follows:
+<OL>
+<LI> Ensure that a.out is a single-threaded executable.  This doesn't yet work
+for multithreaded programs.
+<LI> If possible, ensure that the addr2line program is installed in
+/usr/bin.  (It comes with RedHat Linux.)
+<LI> If possible, compile a.out with full debug information.
+This will improve the quality of the leak reports.  With this approach, it is
+no longer necessary to call GC_ routines explicitly, though that can also
+improve the quality of the leak reports.
+<LI> Build the collector and install it in directory <I>foo</i> as follows:
+<UL>
+<LI> configure --prefix=<I>foo</i> --enable-full-debug --enable-redirect-malloc
+--disable-threads
+<LI> make
+<LI> make install
+</ul>
+<LI> Set environment variables as follows:
+<UL>
+<LI> LD_PRELOAD=<I>foo</i>/lib/libgc.so
+<LI> GC_FIND_LEAK
+<LI> You may also want to set GC_PRINT_STATS (to confirm that the collector
+is running) and/or GC_LOOP_ON_ABORT (to facilitate debugging from another
+window if something goes wrong).
+</ul
+<LI> Simply run a.out as you normally would.  Note that if you run anything
+else (<I>e.g.</i> your editor) with those environment variables set,
+it will also be leak tested.  This may or may not be useful and/or
+embarrassing.  It can generate
+mountains of leak reports if the application wasn't designed to avoid leaks,
+<I>e.g.</i> because it's always short-lived.
+</ol>
+This has not yet been thropughly tested on large applications, but it's known
+to do the right thing on at least some small ones.
+</body>
+</html>
diff --git a/boehm-gc/doc/scale.html b/boehm-gc/doc/scale.html
new file mode 100644
index 00000000000..2e70148dfb7
--- /dev/null
+++ b/boehm-gc/doc/scale.html
@@ -0,0 +1,210 @@
+<HTML>
+<HEAD>
+<TITLE>Garbage collector scalability</TITLE>
+</HEAD>
+<BODY>
+<H1>Garbage collector scalability</h1>
+In its default configuration, the Boehm-Demers-Weiser garbage collector
+is not thread-safe.  It can be made thread-safe for a number of environments
+by building the collector with the appropriate
+<TT>-D</tt><I>XXX</i><TT>-THREADS</tt> compilation
+flag.  This has primarily two effects:
+<OL>
+<LI> It causes the garbage collector to stop all other threads when
+it needs to see a consistent memory state.
+<LI> It causes the collector to acquire a lock around essentially all
+allocation and garbage collection activity.
+</ol>
+Since a single lock is used for all allocation-related activity, only one
+thread can be allocating or collecting at one point.  This inherently
+limits performance of multi-threaded applications on multiprocessors.
+<P>
+On most platforms, the allocator/collector lock is implemented as a
+spin lock with exponential back-off.  Longer wait times are implemented
+by yielding and/or sleeping.  If a collection is in progress, the pure
+spinning stage is skipped.  This has the advantage that uncontested and
+thus most uniprocessor lock acquisitions are very cheap.  It has the
+disadvantage that the application may sleep for small periods of time
+even when there is work to be done.  And threads may be unnecessarily
+woken up for short periods.  Nonetheless, this scheme empirically
+outperforms native queue-based mutual exclusion implementations in most
+cases, sometimes drastically so.
+<H2>Options for enhanced scalability</h2>
+Version 6.0 of the collector adds two facilities to enhance collector
+scalability on multiprocessors.  As of 6.0alpha1, these are supported 
+only under Linux on X86 and IA64 processors, though ports to other
+otherwise supported Pthreads platforms should be straightforward.
+They are intended to be used together.
+<UL>
+<LI>
+Building the collector with <TT>-DPARALLEL_MARK</tt> allows the collector to
+run the mark phase in parallel in multiple threads, and thus on multiple
+processors.  The mark phase typically consumes the large majority of the
+collection time.  Thus this largely parallelizes the garbage collector
+itself, though not the allocation process.  Currently the marking is
+performed by the thread that triggered the collection, together with
+<I>N</i>-1 dedicated
+threads, where <I>N</i> is the number of processors detected by the collector.
+The dedicated threads are created once at initialization time.
+<P>
+A second effect of this flag is to switch to a more concurrent
+implementation of <TT>GC_malloc_many</tt>, so that free lists can be
+built, and memory can be cleared, by more than one thread concurrently.
+<LI>
+Building the collector with -DTHREAD_LOCAL_ALLOC adds support for thread
+local allocation.  It does not, by itself, cause thread local allocation
+to be used.  It simply allows the use of the interface in 
+<TT>gc_local_alloc.h</tt>.
+<P>
+Memory returned from thread-local allocators is completely interchangeable
+with that returned by the standard allocators.  It may be used by other
+threads.  The only difference is that, if the thread allocates enough
+memory of a certain kind, it will build a thread-local free list for
+objects of that kind, and allocate from that.  This greatly reduces
+locking.  The thread-local free lists are refilled using 
+<TT>GC_malloc_many</tt>.
+<P>
+An important side effect of this flag is to replace the default
+spin-then-sleep lock to be replace by a spin-then-queue based implementation.
+This <I>reduces performance</i> for the standard allocation functions,
+though it usually improves performance when thread-local allocation is
+used heavily, and thus the number of short-duration lock acquisitions
+is greatly reduced.
+</ul>
+<P>
+The easiest way to switch an application to thread-local allocation is to
+<OL>
+<LI> Define the macro <TT>GC_REDIRECT_TO_LOCAL</tt>,
+and then include the <TT>gc.h</tt>
+header in each client source file.
+<LI> Invoke <TT>GC_thr_init()</tt> before any allocation.
+<LI> Allocate using <TT>GC_MALLOC</tt>, <TT>GC_MALLOC_ATOMIC</tt>,
+and/or <TT>GC_GCJ_MALLOC</tt>.
+</ol>
+<H2>The Parallel Marking Algorithm</h2>
+We use an algorithm similar to
+<A HREF="http://www.yl.is.s.u-tokyo.ac.jp/gc/">that developed by
+Endo, Taura, and Yonezawa</a> at the University of Tokyo.
+However, the data structures and implementation are different,
+and represent a smaller change to the original collector source,
+probably at the expense of extreme scalability.  Some of
+the refinements they suggest, <I>e.g.</i> splitting large
+objects, were also incorporated into out approach.
+<P>
+The global mark stack is transformed into a global work queue.
+Unlike the usual case, it never shrinks during a mark phase.
+The mark threads remove objects from the queue by copying them to a
+local mark stack and changing the global descriptor to zero, indicating
+that there is no more work to be done for this entry.
+This removal
+is done with no synchronization.  Thus it is possible for more than
+one worker to remove the same entry, resulting in some work duplication.
+<P>
+The global work queue grows only if a marker thread decides to
+return some of its local mark stack to the global one.  This
+is done if the global queue appears to be running low, or if
+the local stack is in danger of overflowing.  It does require
+synchronization, but should be relatively rare.
+<P>
+The sequential marking code is reused to process local mark stacks.
+Hence the amount of additional code required for parallel marking
+is minimal.
+<P>
+It should be possible to use generational collection in the presence of the
+parallel collector, by calling <TT>GC_enable_incremental()</tt>.
+This does not result in fully incremental collection, since parallel mark
+phases cannot currently be interrupted, and doing so may be too
+expensive.
+<P>
+Gcj-style mark descriptors do not currently mix with the combination
+of local allocation and incremental collection.  They should work correctly
+with one or the other, but not both.
+<P>
+The number of marker threads is set on startup to the number of
+available processors (or to the value of the <TT>GC_NPROCS</tt>
+environment variable).  If only a single processor is detected,
+parallel marking is disabled.
+<P>
+Note that setting GC_NPROCS to 1 also causes some lock acquisitions inside
+the collector to immediately yield the processor instead of busy waiting
+first.  In the case of a multiprocessor and a client with multiple
+simultaneously runnable threads, this may have disastrous performance
+consequences (e.g. a factor of 10 slowdown). 
+<H2>Performance</h2>
+We conducted some simple experiments with a version of
+<A HREF="gc_bench.html">our GC benchmark</a> that was slightly modified to
+run multiple concurrent client threads in the same address space.
+Each client thread does the same work as the original benchmark, but they share
+a heap.
+This benchmark involves very little work outside of memory allocation.
+This was run with GC 6.0alpha3 on a dual processor Pentium III/500 machine
+under Linux 2.2.12.
+<P>
+Running with a thread-unsafe collector,  the benchmark ran in 9
+seconds.  With the simple thread-safe collector,
+built with <TT>-DLINUX_THREADS</tt>, the execution time
+increased to 10.3 seconds, or 23.5 elapsed seconds with two clients.
+(The times for the <TT>malloc</tt>/i<TT>free</tt> version
+with glibc <TT>malloc</tt>
+are 10.51 (standard library, pthreads not linked),
+20.90 (one thread, pthreads linked),
+and 24.55 seconds respectively. The benchmark favors a
+garbage collector, since most objects are small.)
+<P>
+The following table gives execution times for the collector built
+with parallel marking and thread-local allocation support
+(<TT>-DGC_LINUX_THREADS -DPARALLEL_MARK -DTHREAD_LOCAL_ALLOC</tt>).  We tested
+the client using either one or two marker threads, and running
+one or two client threads.  Note that the client uses thread local
+allocation exclusively.  With -DTHREAD_LOCAL_ALLOC the collector
+switches to a locking strategy that is better tuned to less frequent
+lock acquisition.  The standard allocation primitives thus peform
+slightly worse than without -DTHREAD_LOCAL_ALLOC, and should be
+avoided in time-critical code.
+<P>
+(The results using <TT>pthread_mutex_lock</tt>
+directly for allocation locking would have been worse still, at
+least for older versions of linuxthreads.
+With THREAD_LOCAL_ALLOC, we first repeatedly try to acquire the
+lock with pthread_mutex_try_lock(), busy_waiting between attempts.
+After a fixed number of attempts, we use pthread_mutex_lock().)
+<P>
+These measurements do not use incremental collection, nor was prefetching
+enabled in the marker.  We used the C version of the benchmark.
+All measurements are in elapsed seconds on an unloaded machine.
+<P>
+<TABLE BORDER ALIGN="CENTER">
+<TR><TH>Number of threads</th><TH>1 marker thread (secs.)</th>
+<TH>2 marker threads (secs.)</th></tr>
+<TR><TD>1 client</td><TD ALIGN="CENTER">10.45</td><TD ALIGN="CENTER">7.85</td>
+<TR><TD>2 clients</td><TD ALIGN="CENTER">19.95</td><TD ALIGN="CENTER">12.3</td>
+</table>
+<PP>
+The execution time for the single threaded case is slightly worse than with
+simple locking.  However, even the single-threaded benchmark runs faster than
+even the thread-unsafe version if a second processor is available.
+The execution time for two clients with thread local allocation time is
+only 1.4 times the sequential execution time for a single thread in a
+thread-unsafe environment, even though it involves twice the client work.
+That represents close to a
+factor of 2 improvement over the 2 client case with the old collector.
+The old collector clearly
+still suffered from some contention overhead, in spite of the fact that the
+locking scheme had been fairly well tuned.
+<P>
+Full linear speedup (i.e. the same execution time for 1 client on one
+processor as 2 clients on 2 processors)
+is probably not achievable on this kind of
+hardware even with such a small number of processors,
+since the memory system is
+a major constraint for the garbage collector,
+the processors usually share a single memory bus, and thus
+the aggregate memory bandwidth does not increase in
+proportion to the number of processors. 
+<P>
+These results are likely to be very sensitive to both hardware and OS
+issues.  Preliminary experiments with an older Pentium Pro machine running
+an older kernel were far less encouraging.
+
+</body>
+</html>
diff --git a/boehm-gc/include/gc_allocator.h b/boehm-gc/include/gc_allocator.h
new file mode 100644
index 00000000000..87c85099381
--- /dev/null
+++ b/boehm-gc/include/gc_allocator.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 1996-1997
+ * Silicon Graphics Computer Systems, Inc.
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation.  Silicon Graphics makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ *
+ * Copyright (c) 2002
+ * Hewlett-Packard Company
+ *
+ * Permission to use, copy, modify, distribute and sell this software
+ * and its documentation for any purpose is hereby granted without fee,
+ * provided that the above copyright notice appear in all copies and
+ * that both that copyright notice and this permission notice appear
+ * in supporting documentation.  Hewlett-Packard Company makes no
+ * representations about the suitability of this software for any
+ * purpose.  It is provided "as is" without express or implied warranty.
+ */
+
+/*
+ * This implements standard-conforming allocators that interact with
+ * the garbage collector.  Gc_alloctor<T> allocates garbage-collectable
+ * objects of type T.  Traceable_allocator<T> allocates objects that
+ * are not temselves garbage collected, but are scanned by the
+ * collector for pointers to collectable objects.  Traceable_alloc
+ * should be used for explicitly managed STL containers that may
+ * point to collectable objects.
+ *
+ * This code was derived from an earlier version of the GNU C++ standard
+ * library, which itself was derived from the SGI STL implementation.
+ */
+
+#include "gc.h" 	// For size_t
+
+/* First some helpers to allow us to dispatch on whether or not a type
+ * is known to be pointerfree.
+ * These are private, except that the client may invoke the
+ * GC_DECLARE_PTRFREE macro.
+ */
+
+struct GC_true_type {};
+struct GC_false_type {};
+
+template <class GC_tp>
+struct GC_type_traits {
+  GC_false_type GC_is_ptr_free;
+};
+
+# define GC_DECLARE_PTRFREE(T) \
+template<> struct GC_type_traits<T> { GC_true_type GC_is_ptr_free; }
+
+GC_DECLARE_PTRFREE(signed char);
+GC_DECLARE_PTRFREE(unsigned char);
+GC_DECLARE_PTRFREE(signed short);
+GC_DECLARE_PTRFREE(unsigned short);
+GC_DECLARE_PTRFREE(signed int);
+GC_DECLARE_PTRFREE(unsigned int);
+GC_DECLARE_PTRFREE(signed long);
+GC_DECLARE_PTRFREE(unsigned long);
+GC_DECLARE_PTRFREE(float);
+GC_DECLARE_PTRFREE(double);
+/* The client may want to add others.	*/
+
+// In the following GC_Tp is GC_true_type iff we are allocating a
+// pointerfree object.
+template <class GC_Tp>
+inline void * GC_selective_alloc(size_t n, GC_Tp) {
+    return GC_MALLOC(n);
+}
+
+template <>
+inline void * GC_selective_alloc<GC_true_type>(size_t n, GC_true_type) {
+    return GC_MALLOC_ATOMIC(n);
+}
+
+/* Now the public gc_allocator<T> class:
+ */
+template <class GC_Tp>
+class gc_allocator {
+public:
+  typedef size_t     size_type;
+  typedef ptrdiff_t  difference_type;
+  typedef GC_Tp*       pointer;
+  typedef const GC_Tp* const_pointer;
+  typedef GC_Tp&       reference;
+  typedef const GC_Tp& const_reference;
+  typedef GC_Tp        value_type;
+
+  template <class GC_Tp1> struct rebind {
+    typedef gc_allocator<GC_Tp1> other;
+  };
+
+  gc_allocator()  {}
+# ifndef _MSC_VER
+    // I'm not sure why this is needed here in addition to the following.
+    // The standard specifies it for the standard allocator, but VC++ rejects
+    // it.	-HB
+    gc_allocator(const gc_allocator&) throw() {}
+# endif
+  template <class GC_Tp1> gc_allocator(const gc_allocator<GC_Tp1>&) throw() {}
+  ~gc_allocator() throw() {}
+
+  pointer address(reference GC_x) const { return &GC_x; }
+  const_pointer address(const_reference GC_x) const { return &GC_x; }
+
+  // GC_n is permitted to be 0.  The C++ standard says nothing about what
+  // the return value is when GC_n == 0.
+  GC_Tp* allocate(size_type GC_n, const void* = 0) {
+    GC_type_traits<GC_Tp> traits;
+    return static_cast<GC_Tp *>
+	    (GC_selective_alloc(GC_n * sizeof(GC_Tp),
+			        traits.GC_is_ptr_free));
+  }
+
+  // __p is not permitted to be a null pointer.
+  void deallocate(pointer __p, size_type GC_n)
+    { GC_FREE(__p); }
+
+  size_type max_size() const throw()
+    { return size_t(-1) / sizeof(GC_Tp); }
+
+  void construct(pointer __p, const GC_Tp& __val) { new(__p) GC_Tp(__val); }
+  void destroy(pointer __p) { __p->~GC_Tp(); }
+};
+
+template<>
+class gc_allocator<void> {
+  typedef size_t      size_type;
+  typedef ptrdiff_t   difference_type;
+  typedef void*       pointer;
+  typedef const void* const_pointer;
+  typedef void        value_type;
+
+  template <class GC_Tp1> struct rebind {
+    typedef gc_allocator<GC_Tp1> other;
+  };
+};
+
+
+template <class GC_T1, class GC_T2>
+inline bool operator==(const gc_allocator<GC_T1>&, const gc_allocator<GC_T2>&)
+{
+  return true;
+}
+
+template <class GC_T1, class GC_T2>
+inline bool operator!=(const gc_allocator<GC_T1>&, const gc_allocator<GC_T2>&)
+{
+  return false;
+}
+
+/*
+ * And the public traceable_allocator class.
+ */
+
+// Note that we currently don't specialize the pointer-free case, since a
+// pointer-free traceable container doesn't make that much sense,
+// though it could become an issue due to abstraction boundaries.
+template <class GC_Tp>
+class traceable_allocator {
+public:
+  typedef size_t     size_type;
+  typedef ptrdiff_t  difference_type;
+  typedef GC_Tp*       pointer;
+  typedef const GC_Tp* const_pointer;
+  typedef GC_Tp&       reference;
+  typedef const GC_Tp& const_reference;
+  typedef GC_Tp        value_type;
+
+  template <class GC_Tp1> struct rebind {
+    typedef traceable_allocator<GC_Tp1> other;
+  };
+
+  traceable_allocator() throw() {}
+# ifndef _MSC_VER
+    traceable_allocator(const traceable_allocator&) throw() {}
+# endif
+  template <class GC_Tp1> traceable_allocator
+	  (const traceable_allocator<GC_Tp1>&) throw() {}
+  ~traceable_allocator() throw() {}
+
+  pointer address(reference GC_x) const { return &GC_x; }
+  const_pointer address(const_reference GC_x) const { return &GC_x; }
+
+  // GC_n is permitted to be 0.  The C++ standard says nothing about what
+  // the return value is when GC_n == 0.
+  GC_Tp* allocate(size_type GC_n, const void* = 0) {
+    return static_cast<GC_Tp*>(GC_MALLOC_UNCOLLECTABLE(GC_n * sizeof(GC_Tp)));
+  }
+
+  // __p is not permitted to be a null pointer.
+  void deallocate(pointer __p, size_type GC_n)
+    { GC_FREE(__p); }
+
+  size_type max_size() const throw()
+    { return size_t(-1) / sizeof(GC_Tp); }
+
+  void construct(pointer __p, const GC_Tp& __val) { new(__p) GC_Tp(__val); }
+  void destroy(pointer __p) { __p->~GC_Tp(); }
+};
+
+template<>
+class traceable_allocator<void> {
+  typedef size_t      size_type;
+  typedef ptrdiff_t   difference_type;
+  typedef void*       pointer;
+  typedef const void* const_pointer;
+  typedef void        value_type;
+
+  template <class GC_Tp1> struct rebind {
+    typedef traceable_allocator<GC_Tp1> other;
+  };
+};
+
+
+template <class GC_T1, class GC_T2>
+inline bool operator==(const traceable_allocator<GC_T1>&, const traceable_allocator<GC_T2>&)
+{
+  return true;
+}
+
+template <class GC_T1, class GC_T2>
+inline bool operator!=(const traceable_allocator<GC_T1>&, const traceable_allocator<GC_T2>&)
+{
+  return false;
+}
+
diff --git a/boehm-gc/include/gc_config_macros.h b/boehm-gc/include/gc_config_macros.h
new file mode 100644
index 00000000000..0c836d876c8
--- /dev/null
+++ b/boehm-gc/include/gc_config_macros.h
@@ -0,0 +1,147 @@
+/*
+ * This should never be included directly.  It is included only from gc.h.
+ * We separate it only to make gc.h more suitable as documentation.
+ * 
+ * Some tests for old macros.  These violate our namespace rules and will
+ * disappear shortly.  Use the GC_ names.
+ */
+#if defined(SOLARIS_THREADS) || defined(_SOLARIS_THREADS)
+# define GC_SOLARIS_THREADS
+#endif
+#if defined(_SOLARIS_PTHREADS)
+# define GC_SOLARIS_PTHREADS
+#endif
+#if defined(IRIX_THREADS)
+# define GC_IRIX_THREADS
+#endif
+#if defined(DGUX_THREADS)
+# if !defined(GC_DGUX386_THREADS)
+#  define GC_DGUX386_THREADS
+# endif
+#endif
+#if defined(AIX_THREADS)
+# define GC_AIX_THREADS
+#endif
+#if defined(HPUX_THREADS)
+# define GC_HPUX_THREADS
+#endif
+#if defined(OSF1_THREADS)
+# define GC_OSF1_THREADS
+#endif
+#if defined(LINUX_THREADS)
+# define GC_LINUX_THREADS
+#endif
+#if defined(WIN32_THREADS)
+# define GC_WIN32_THREADS
+#endif
+#if defined(USE_LD_WRAP)
+# define GC_USE_LD_WRAP
+#endif
+
+#if !defined(_REENTRANT) && (defined(GC_SOLARIS_THREADS) \
+		             || defined(GC_SOLARIS_PTHREADS) \
+			     || defined(GC_HPUX_THREADS) \
+			     || defined(GC_AIX_THREADS) \
+			     || defined(GC_LINUX_THREADS))
+# define _REENTRANT
+	/* Better late than never.  This fails if system headers that	*/
+	/* depend on this were previously included.			*/
+#endif
+
+#if defined(GC_DGUX386_THREADS) && !defined(_POSIX4A_DRAFT10_SOURCE)
+# define _POSIX4A_DRAFT10_SOURCE 1
+#endif
+
+# if defined(GC_SOLARIS_PTHREADS) || defined(GC_FREEBSD_THREADS) || \
+	defined(GC_IRIX_THREADS) || defined(GC_LINUX_THREADS) || \
+	defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS) || \
+	defined(GC_DGUX386_THREADS) || defined(GC_DARWIN_THREADS) || \
+	defined(GC_AIX_THREADS) || \
+        (defined(GC_WIN32_THREADS) && defined(__CYGWIN32__))
+#   define GC_PTHREADS
+# endif
+
+#if defined(GC_THREADS) && !defined(GC_PTHREADS)
+# if defined(__linux__)
+#   define GC_LINUX_THREADS
+#   define GC_PTHREADS
+# endif
+# if !defined(LINUX) && (defined(_PA_RISC1_1) || defined(_PA_RISC2_0) \
+                         || defined(hppa) || defined(__HPPA))
+#   define GC_HPUX_THREADS
+#   define GC_PTHREADS
+# endif
+# if !defined(__linux__) && (defined(__alpha) || defined(__alpha__))
+#   define GC_OSF1_THREADS
+#   define GC_PTHREADS
+# endif
+# if defined(__mips) && !defined(__linux__)
+#   define GC_IRIX_THREADS
+#   define GC_PTHREADS
+# endif
+# if defined(__sparc) && !defined(__linux__)
+#   define GC_SOLARIS_PTHREADS
+#   define GC_PTHREADS
+# endif
+# if defined(__APPLE__) && defined(__MACH__) && defined(__ppc__)
+#   define GC_DARWIN_THREADS
+#   define GC_PTHREADS
+# endif
+# if !defined(GC_PTHREADS) && defined(__FreeBSD__)
+#   define GC_FREEBSD_THREADS
+#   define GC_PTHREADS
+# endif
+# if defined(DGUX) && (defined(i386) || defined(__i386__))
+#   define GC_DGUX386_THREADS
+#   define GC_PTHREADS
+# endif
+#endif /* GC_THREADS */
+
+#if defined(GC_THREADS) && !defined(GC_PTHREADS) && defined(MSWIN32)
+# define GC_WIN32_THREADS
+#endif
+
+#if defined(GC_SOLARIS_PTHREADS) && !defined(GC_SOLARIS_THREADS)
+#   define GC_SOLARIS_THREADS
+#endif
+
+# define __GC
+# include <stddef.h>
+# ifdef _WIN32_WCE
+/* Yet more kluges for WinCE */
+#   include <stdlib.h>		/* size_t is defined here */
+    typedef long ptrdiff_t;	/* ptrdiff_t is not defined */
+# endif
+
+#if defined(_DLL) && !defined(GC_NOT_DLL) && !defined(GC_DLL)
+# define GC_DLL
+#endif
+
+#if defined(__MINGW32__) && defined(GC_DLL)
+# ifdef GC_BUILD
+#   define GC_API __declspec(dllexport)
+# else
+#   define GC_API __declspec(dllimport)
+# endif
+#endif
+
+#if (defined(__DMC__) || defined(_MSC_VER)) && defined(GC_DLL)
+# ifdef GC_BUILD
+#   define GC_API extern __declspec(dllexport)
+# else
+#   define GC_API __declspec(dllimport)
+# endif
+#endif
+
+#if defined(__WATCOMC__) && defined(GC_DLL)
+# ifdef GC_BUILD
+#   define GC_API extern __declspec(dllexport)
+# else
+#   define GC_API extern __declspec(dllimport)
+# endif
+#endif
+
+#ifndef GC_API
+#define GC_API extern
+#endif
+
diff --git a/boehm-gc/include/private/darwin_semaphore.h b/boehm-gc/include/private/darwin_semaphore.h
new file mode 100644
index 00000000000..0f43982d5c1
--- /dev/null
+++ b/boehm-gc/include/private/darwin_semaphore.h
@@ -0,0 +1,68 @@
+#ifndef GC_DARWIN_SEMAPHORE_H
+#define GC_DARWIN_SEMAPHORE_H
+
+#if !defined(GC_DARWIN_THREADS)
+#error darwin_semaphore.h included with GC_DARWIN_THREADS not defined
+#endif
+
+/*
+   This is a very simple semaphore implementation for darwin. It
+   is implemented in terms of pthreads calls so it isn't async signal
+   safe. This isn't a problem because signals aren't used to
+   suspend threads on darwin.
+*/
+   
+typedef struct {
+    pthread_mutex_t mutex;
+    pthread_cond_t cond;
+    int value;
+} sem_t;
+
+static int sem_init(sem_t *sem, int pshared, int value) {
+    int ret;
+    if(pshared)
+        GC_abort("sem_init with pshared set");
+    sem->value = value;
+    
+    ret = pthread_mutex_init(&sem->mutex,NULL);
+    if(ret < 0) return -1;
+    ret = pthread_cond_init(&sem->cond,NULL);
+    if(ret < 0) return -1;
+    return 0;
+}
+
+static int sem_post(sem_t *sem) {
+    if(pthread_mutex_lock(&sem->mutex) < 0)
+        return -1;
+    sem->value++;
+    if(pthread_cond_signal(&sem->cond) < 0) {
+        pthread_mutex_unlock(&sem->mutex);
+        return -1;
+    }
+    if(pthread_mutex_unlock(&sem->mutex) < 0)
+        return -1;
+    return 0;
+}
+
+static int sem_wait(sem_t *sem) {
+    if(pthread_mutex_lock(&sem->mutex) < 0)
+        return -1;
+    while(sem->value == 0) {
+        pthread_cond_wait(&sem->cond,&sem->mutex);
+    }
+    sem->value--;
+    if(pthread_mutex_unlock(&sem->mutex) < 0)
+        return -1;    
+    return 0;
+}
+
+static int sem_destroy(sem_t *sem) {
+    int ret;
+    ret = pthread_cond_destroy(&sem->cond);
+    if(ret < 0) return -1;
+    ret = pthread_mutex_destroy(&sem->mutex);
+    if(ret < 0) return -1;
+    return 0;
+}
+
+#endif
diff --git a/boehm-gc/include/private/darwin_stop_world.h b/boehm-gc/include/private/darwin_stop_world.h
new file mode 100644
index 00000000000..9924297ec77
--- /dev/null
+++ b/boehm-gc/include/private/darwin_stop_world.h
@@ -0,0 +1,15 @@
+#ifndef GC_DARWIN_STOP_WORLD_H
+#define GC_DARWIN_STOP_WORLD_H
+
+#if !defined(GC_DARWIN_THREADS)
+#error darwin_stop_world.h included without GC_DARWIN_THREADS defined
+#endif
+
+#include <mach/mach.h>
+#include <mach/thread_act.h>
+
+struct thread_stop_info {
+    mach_port_t mach_thread;
+};
+
+#endif
diff --git a/boehm-gc/include/private/pthread_stop_world.h b/boehm-gc/include/private/pthread_stop_world.h
new file mode 100644
index 00000000000..054c7a0eacd
--- /dev/null
+++ b/boehm-gc/include/private/pthread_stop_world.h
@@ -0,0 +1,12 @@
+#ifndef GC_PTHREAD_STOP_WORLD_H
+#define GC_PTHREAD_STOP_WORLD_H
+
+struct thread_stop_info {
+    int	signal;
+    word last_stop_count;	/* GC_last_stop_count value when thread	*/
+    				/* last successfully handled a suspend	*/
+    				/* signal.				*/
+    ptr_t stack_ptr;  		/* Valid only when stopped.      	*/
+};
+    
+#endif
diff --git a/boehm-gc/include/private/pthread_support.h b/boehm-gc/include/private/pthread_support.h
new file mode 100644
index 00000000000..0ef917e7ef0
--- /dev/null
+++ b/boehm-gc/include/private/pthread_support.h
@@ -0,0 +1,97 @@
+#ifndef GC_PTHREAD_SUPPORT_H
+#define GC_PTHREAD_SUPPORT_H
+
+# include "private/gc_priv.h"
+
+# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
+     && !defined(GC_IRIX_THREADS) && !defined(GC_WIN32_THREADS)
+     
+#if defined(GC_DARWIN_THREADS)
+# include "private/darwin_stop_world.h"
+#else
+# include "private/pthread_stop_world.h"
+#endif
+
+/* We use the allocation lock to protect thread-related data structures. */
+
+/* The set of all known threads.  We intercept thread creation and 	*/
+/* joins.								*/
+/* Protected by allocation/GC lock.					*/
+/* Some of this should be declared volatile, but that's inconsistent	*/
+/* with some library routine declarations.  		 		*/
+typedef struct GC_Thread_Rep {
+    struct GC_Thread_Rep * next;  /* More recently allocated threads	*/
+				  /* with a given pthread id come 	*/
+				  /* first.  (All but the first are	*/
+				  /* guaranteed to be dead, but we may  */
+				  /* not yet have registered the join.) */
+    pthread_t id;
+    /* Extra bookkeeping information the stopping code uses */
+    struct thread_stop_info stop_info;
+    
+    short flags;
+#	define FINISHED 1   	/* Thread has exited.	*/
+#	define DETACHED 2	/* Thread is intended to be detached.	*/
+#	define MAIN_THREAD 4	/* True for the original thread only.	*/
+    short thread_blocked;	/* Protected by GC lock.		*/
+    				/* Treated as a boolean value.  If set,	*/
+    				/* thread will acquire GC lock before	*/
+    				/* doing any pointer manipulations, and	*/
+    				/* has set its sp value.  Thus it does	*/
+    				/* not need to be sent a signal to stop	*/
+    				/* it.					*/
+    ptr_t stack_end;		/* Cold end of the stack.		*/
+#   ifdef IA64
+	ptr_t backing_store_end;
+	ptr_t backing_store_ptr;
+#   endif
+    void * status;		/* The value returned from the thread.  */
+    				/* Used only to avoid premature 	*/
+				/* reclamation of any data it might 	*/
+				/* reference.				*/
+#   ifdef THREAD_LOCAL_ALLOC
+#	if CPP_WORDSZ == 64 && defined(ALIGN_DOUBLE)
+#	    define GRANULARITY 16
+#	    define NFREELISTS 49
+#	else
+#	    define GRANULARITY 8
+#	    define NFREELISTS 65
+#	endif
+	/* The ith free list corresponds to size i*GRANULARITY */
+#	define INDEX_FROM_BYTES(n) ((ADD_SLOP(n) + GRANULARITY - 1)/GRANULARITY)
+#	define BYTES_FROM_INDEX(i) ((i) * GRANULARITY - EXTRA_BYTES)
+#	define SMALL_ENOUGH(bytes) (ADD_SLOP(bytes) <= \
+				    (NFREELISTS-1)*GRANULARITY)
+	ptr_t ptrfree_freelists[NFREELISTS];
+	ptr_t normal_freelists[NFREELISTS];
+#	ifdef GC_GCJ_SUPPORT
+	  ptr_t gcj_freelists[NFREELISTS];
+#	endif
+		/* Free lists contain either a pointer or a small count */
+		/* reflecting the number of granules allocated at that	*/
+		/* size.						*/
+		/* 0 ==> thread-local allocation in use, free list	*/
+		/*       empty.						*/
+		/* > 0, <= DIRECT_GRANULES ==> Using global allocation,	*/
+		/*       too few objects of this size have been		*/
+		/* 	 allocated by this thread.			*/
+		/* >= HBLKSIZE  => pointer to nonempty free list.	*/
+		/* > DIRECT_GRANULES, < HBLKSIZE ==> transition to	*/
+		/*    local alloc, equivalent to 0.			*/
+#	define DIRECT_GRANULES (HBLKSIZE/GRANULARITY)
+		/* Don't use local free lists for up to this much 	*/
+		/* allocation.						*/
+#   endif
+} * GC_thread;
+
+# define THREAD_TABLE_SZ 128	/* Must be power of 2	*/
+extern volatile GC_thread GC_threads[THREAD_TABLE_SZ];
+
+extern GC_bool GC_thr_initialized;
+
+GC_thread GC_lookup_thread(pthread_t id);
+
+void GC_stop_init();
+
+#endif /* GC_PTHREADS && !GC_SOLARIS_THREADS.... etc */
+#endif /* GC_PTHREAD_SUPPORT_H */
diff --git a/boehm-gc/missing b/boehm-gc/missing
new file mode 100755
index 00000000000..dd583709f53
--- /dev/null
+++ b/boehm-gc/missing
@@ -0,0 +1,336 @@
+#! /bin/sh
+# Common stub for a few missing GNU programs while installing.
+# Copyright 1996, 1997, 1999, 2000 Free Software Foundation, Inc.
+# Originally by Fran,cois Pinard <pinard@iro.umontreal.ca>, 1996.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+if test $# -eq 0; then
+  echo 1>&2 "Try \`$0 --help' for more information"
+  exit 1
+fi
+
+run=:
+
+# In the cases where this matters, `missing' is being run in the
+# srcdir already.
+if test -f configure.ac; then
+  configure_ac=configure.ac
+else
+  configure_ac=configure.in
+fi
+
+case "$1" in
+--run)
+  # Try to run requested program, and just exit if it succeeds.
+  run=
+  shift
+  "$@" && exit 0
+  ;;
+esac
+
+# If it does not exist, or fails to run (possibly an outdated version),
+# try to emulate it.
+case "$1" in
+
+  -h|--h|--he|--hel|--help)
+    echo "\
+$0 [OPTION]... PROGRAM [ARGUMENT]...
+
+Handle \`PROGRAM [ARGUMENT]...' for when PROGRAM is missing, or return an
+error status if there is no known handling for PROGRAM.
+
+Options:
+  -h, --help      display this help and exit
+  -v, --version   output version information and exit
+  --run           try to run the given command, and emulate it if it fails
+
+Supported PROGRAM values:
+  aclocal      touch file \`aclocal.m4'
+  autoconf     touch file \`configure'
+  autoheader   touch file \`config.h.in'
+  automake     touch all \`Makefile.in' files
+  bison        create \`y.tab.[ch]', if possible, from existing .[ch]
+  flex         create \`lex.yy.c', if possible, from existing .c
+  help2man     touch the output file
+  lex          create \`lex.yy.c', if possible, from existing .c
+  makeinfo     touch the output file
+  tar          try tar, gnutar, gtar, then tar without non-portable flags
+  yacc         create \`y.tab.[ch]', if possible, from existing .[ch]"
+    ;;
+
+  -v|--v|--ve|--ver|--vers|--versi|--versio|--version)
+    echo "missing 0.4 - GNU automake"
+    ;;
+
+  -*)
+    echo 1>&2 "$0: Unknown \`$1' option"
+    echo 1>&2 "Try \`$0 --help' for more information"
+    exit 1
+    ;;
+
+  aclocal*)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified \`acinclude.m4' or \`${configure_ac}'.  You might want
+         to install the \`Automake' and \`Perl' packages.  Grab them from
+         any GNU archive site."
+    touch aclocal.m4
+    ;;
+
+  autoconf)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified \`${configure_ac}'.  You might want to install the
+         \`Autoconf' and \`GNU m4' packages.  Grab them from any GNU
+         archive site."
+    touch configure
+    ;;
+
+  autoheader)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified \`acconfig.h' or \`${configure_ac}'.  You might want
+         to install the \`Autoconf' and \`GNU m4' packages.  Grab them
+         from any GNU archive site."
+    files=`sed -n 's/^[ ]*A[CM]_CONFIG_HEADER(\([^)]*\)).*/\1/p' ${configure_ac}`
+    test -z "$files" && files="config.h"
+    touch_files=
+    for f in $files; do
+      case "$f" in
+      *:*) touch_files="$touch_files "`echo "$f" |
+				       sed -e 's/^[^:]*://' -e 's/:.*//'`;;
+      *) touch_files="$touch_files $f.in";;
+      esac
+    done
+    touch $touch_files
+    ;;
+
+  automake*)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified \`Makefile.am', \`acinclude.m4' or \`${configure_ac}'.
+         You might want to install the \`Automake' and \`Perl' packages.
+         Grab them from any GNU archive site."
+    find . -type f -name Makefile.am -print |
+	   sed 's/\.am$/.in/' |
+	   while read f; do touch "$f"; done
+    ;;
+
+  autom4te)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is needed, and you do not seem to have it handy on your
+         system.  You might have modified some files without having the
+         proper tools for further handling them.
+         You can get \`$1Help2man' as part of \`Autoconf' from any GNU
+         archive site."
+
+    file=`echo "$*" | sed -n 's/.*--output[ =]*\([^ ]*\).*/\1/p'`
+    test -z "$file" && file=`echo "$*" | sed -n 's/.*-o[ ]*\([^ ]*\).*/\1/p'`
+    if test -f "$file"; then
+	touch $file
+    else
+	test -z "$file" || exec >$file
+	echo "#! /bin/sh"
+	echo "# Created by GNU Automake missing as a replacement of"
+	echo "#  $ $@"
+	echo "exit 0"
+	chmod +x $file
+	exit 1
+    fi
+    ;;
+
+  bison|yacc)
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified a \`.y' file.  You may need the \`Bison' package
+         in order for those modifications to take effect.  You can get
+         \`Bison' from any GNU archive site."
+    rm -f y.tab.c y.tab.h
+    if [ $# -ne 1 ]; then
+        eval LASTARG="\${$#}"
+	case "$LASTARG" in
+	*.y)
+	    SRCFILE=`echo "$LASTARG" | sed 's/y$/c/'`
+	    if [ -f "$SRCFILE" ]; then
+	         cp "$SRCFILE" y.tab.c
+	    fi
+	    SRCFILE=`echo "$LASTARG" | sed 's/y$/h/'`
+	    if [ -f "$SRCFILE" ]; then
+	         cp "$SRCFILE" y.tab.h
+	    fi
+	  ;;
+	esac
+    fi
+    if [ ! -f y.tab.h ]; then
+	echo >y.tab.h
+    fi
+    if [ ! -f y.tab.c ]; then
+	echo 'main() { return 0; }' >y.tab.c
+    fi
+    ;;
+
+  lex|flex)
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified a \`.l' file.  You may need the \`Flex' package
+         in order for those modifications to take effect.  You can get
+         \`Flex' from any GNU archive site."
+    rm -f lex.yy.c
+    if [ $# -ne 1 ]; then
+        eval LASTARG="\${$#}"
+	case "$LASTARG" in
+	*.l)
+	    SRCFILE=`echo "$LASTARG" | sed 's/l$/c/'`
+	    if [ -f "$SRCFILE" ]; then
+	         cp "$SRCFILE" lex.yy.c
+	    fi
+	  ;;
+	esac
+    fi
+    if [ ! -f lex.yy.c ]; then
+	echo 'main() { return 0; }' >lex.yy.c
+    fi
+    ;;
+
+  help2man)
+    if test -z "$run" && ($1 --version) > /dev/null 2>&1; then
+       # We have it, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+	 you modified a dependency of a manual page.  You may need the
+	 \`Help2man' package in order for those modifications to take
+	 effect.  You can get \`Help2man' from any GNU archive site."
+
+    file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
+    if test -z "$file"; then
+	file=`echo "$*" | sed -n 's/.*--output=\([^ ]*\).*/\1/p'`
+    fi
+    if [ -f "$file" ]; then
+	touch $file
+    else
+	test -z "$file" || exec >$file
+	echo ".ab help2man is required to generate this page"
+	exit 1
+    fi
+    ;;
+
+  makeinfo)
+    if test -z "$run" && (makeinfo --version) > /dev/null 2>&1; then
+       # We have makeinfo, but it failed.
+       exit 1
+    fi
+
+    echo 1>&2 "\
+WARNING: \`$1' is missing on your system.  You should only need it if
+         you modified a \`.texi' or \`.texinfo' file, or any other file
+         indirectly affecting the aspect of the manual.  The spurious
+         call might also be the consequence of using a buggy \`make' (AIX,
+         DU, IRIX).  You might want to install the \`Texinfo' package or
+         the \`GNU make' package.  Grab either from any GNU archive site."
+    file=`echo "$*" | sed -n 's/.*-o \([^ ]*\).*/\1/p'`
+    if test -z "$file"; then
+      file=`echo "$*" | sed 's/.* \([^ ]*\) *$/\1/'`
+      file=`sed -n '/^@setfilename/ { s/.* \([^ ]*\) *$/\1/; p; q; }' $file`
+    fi
+    touch $file
+    ;;
+
+  tar)
+    shift
+    if test -n "$run"; then
+      echo 1>&2 "ERROR: \`tar' requires --run"
+      exit 1
+    fi
+
+    # We have already tried tar in the generic part.
+    # Look for gnutar/gtar before invocation to avoid ugly error
+    # messages.
+    if (gnutar --version > /dev/null 2>&1); then
+       gnutar ${1+"$@"} && exit 0
+    fi
+    if (gtar --version > /dev/null 2>&1); then
+       gtar ${1+"$@"} && exit 0
+    fi
+    firstarg="$1"
+    if shift; then
+	case "$firstarg" in
+	*o*)
+	    firstarg=`echo "$firstarg" | sed s/o//`
+	    tar "$firstarg" ${1+"$@"} && exit 0
+	    ;;
+	esac
+	case "$firstarg" in
+	*h*)
+	    firstarg=`echo "$firstarg" | sed s/h//`
+	    tar "$firstarg" ${1+"$@"} && exit 0
+	    ;;
+	esac
+    fi
+
+    echo 1>&2 "\
+WARNING: I can't seem to be able to run \`tar' with the given arguments.
+         You may want to install GNU tar or Free paxutils, or check the
+         command line arguments."
+    exit 1
+    ;;
+
+  *)
+    echo 1>&2 "\
+WARNING: \`$1' is needed, and you do not seem to have it handy on your
+         system.  You might have modified some files without having the
+         proper tools for further handling them.  Check the \`README' file,
+         it often tells you about the needed prerequirements for installing
+         this package.  You may also peek at any GNU archive site, in case
+         some other package would contain this missing \`$1' program."
+    exit 1
+    ;;
+esac
+
+exit 0
diff --git a/boehm-gc/powerpc_darwin_mach_dep.s b/boehm-gc/powerpc_darwin_mach_dep.s
new file mode 100644
index 00000000000..92f2c93ca8d
--- /dev/null
+++ b/boehm-gc/powerpc_darwin_mach_dep.s
@@ -0,0 +1,84 @@
+
+; GC_push_regs function. Under some optimization levels GCC will clobber
+; some of the non-volatile registers before we get a chance to save them
+; therefore, this can't be inline asm.
+
+.text
+	.align 2
+	.globl _GC_push_regs
+_GC_push_regs:
+    
+    ; Prolog
+	mflr r0
+	stw r0,8(r1)
+	stwu r1,-80(r1)
+
+	; Push r13-r31
+	mr r3,r13
+	bl L_GC_push_one$stub
+	mr r3,r14
+	bl L_GC_push_one$stub
+	mr r3,r15
+	bl L_GC_push_one$stub
+	mr r3,r16
+	bl L_GC_push_one$stub
+	mr r3,r17
+	bl L_GC_push_one$stub
+	mr r3,r18
+	bl L_GC_push_one$stub
+	mr r3,r19
+	bl L_GC_push_one$stub
+	mr r3,r20
+	bl L_GC_push_one$stub
+	mr r3,r21
+	bl L_GC_push_one$stub
+	mr r3,r22
+	bl L_GC_push_one$stub
+	mr r3,r23
+	bl L_GC_push_one$stub
+	mr r3,r24
+	bl L_GC_push_one$stub
+	mr r3,r25
+	bl L_GC_push_one$stub
+	mr r3,r26
+	bl L_GC_push_one$stub
+	mr r3,r27
+	bl L_GC_push_one$stub
+	mr r3,r28
+	bl L_GC_push_one$stub
+	mr r3,r29
+	bl L_GC_push_one$stub
+	mr r3,r30
+	bl L_GC_push_one$stub
+	mr r3,r31
+	bl L_GC_push_one$stub
+
+    ; 
+    lwz r0,88(r1)
+    addi r1,r1,80
+	mtlr r0
+    	
+	; Return
+	blr
+
+; PIC stuff, generated by GCC
+
+.data
+.picsymbol_stub
+L_GC_push_one$stub:
+	.indirect_symbol _GC_push_one
+	mflr r0
+	bcl 20,31,L0$_GC_push_one
+L0$_GC_push_one:
+	mflr r11
+	addis r11,r11,ha16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)
+	mtlr r0
+	lwz r12,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)(r11)
+	mtctr r12
+	addi r11,r11,lo16(L_GC_push_one$lazy_ptr-L0$_GC_push_one)
+	bctr
+.data
+.lazy_symbol_pointer
+L_GC_push_one$lazy_ptr:
+	.indirect_symbol _GC_push_one
+	.long dyld_stub_binding_helper
diff --git a/boehm-gc/pthread_stop_world.c b/boehm-gc/pthread_stop_world.c
new file mode 100644
index 00000000000..5dfd26d319a
--- /dev/null
+++ b/boehm-gc/pthread_stop_world.c
@@ -0,0 +1,445 @@
+#include "private/pthread_support.h"
+
+#if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
+     && !defined(GC_IRIX_THREADS) && !defined(GC_WIN32_THREADS) \
+     && !defined(GC_DARWIN_THREADS) && !defined(GC_AIX_THREADS)
+
+#include <signal.h>
+#include <semaphore.h>
+#include <errno.h>
+#include <unistd.h>
+
+#if DEBUG_THREADS
+
+#ifndef NSIG
+# if defined(MAXSIG)
+#  define NSIG (MAXSIG+1)
+# elif defined(_NSIG)
+#  define NSIG _NSIG
+# elif defined(__SIGRTMAX)
+#  define NSIG (__SIGRTMAX+1)
+# else
+  --> please fix it
+# endif
+#endif
+
+void GC_print_sig_mask()
+{
+    sigset_t blocked;
+    int i;
+
+    if (pthread_sigmask(SIG_BLOCK, NULL, &blocked) != 0)
+    	ABORT("pthread_sigmask");
+    GC_printf0("Blocked: ");
+    for (i = 1; i < NSIG; i++) {
+        if (sigismember(&blocked, i)) { GC_printf1("%ld ",(long) i); }
+    }
+    GC_printf0("\n");
+}
+
+#endif
+
+word GC_stop_count;	/* Incremented at the beginning of GC_stop_world. */
+
+#ifdef GC_OSF1_THREADS
+  GC_bool GC_retry_signals = TRUE;
+#else
+  GC_bool GC_retry_signals = FALSE;
+#endif
+
+/*
+ * We use signals to stop threads during GC.
+ * 
+ * Suspended threads wait in signal handler for SIG_THR_RESTART.
+ * That's more portable than semaphores or condition variables.
+ * (We do use sem_post from a signal handler, but that should be portable.)
+ *
+ * The thread suspension signal SIG_SUSPEND is now defined in gc_priv.h.
+ * Note that we can't just stop a thread; we need it to save its stack
+ * pointer(s) and acknowledge.
+ */
+
+#ifndef SIG_THR_RESTART
+#  if defined(GC_HPUX_THREADS) || defined(GC_OSF1_THREADS)
+#    ifdef _SIGRTMIN
+#      define SIG_THR_RESTART _SIGRTMIN + 5
+#    else
+#      define SIG_THR_RESTART SIGRTMIN + 5
+#    endif
+#  else
+#   define SIG_THR_RESTART SIGXCPU
+#  endif
+#endif
+
+sem_t GC_suspend_ack_sem;
+
+void GC_suspend_handler(int sig)
+{
+    int dummy;
+    pthread_t my_thread = pthread_self();
+    GC_thread me;
+    sigset_t mask;
+#   ifdef PARALLEL_MARK
+	word my_mark_no = GC_mark_no;
+	/* Marker can't proceed until we acknowledge.  Thus this is	*/
+	/* guaranteed to be the mark_no correspending to our 		*/
+	/* suspension, i.e. the marker can't have incremented it yet.	*/
+#   endif
+    word my_stop_count = GC_stop_count;
+
+    if (sig != SIG_SUSPEND) ABORT("Bad signal in suspend_handler");
+
+#if DEBUG_THREADS
+    GC_printf1("Suspending 0x%lx\n", my_thread);
+#endif
+
+    me = GC_lookup_thread(my_thread);
+    /* The lookup here is safe, since I'm doing this on behalf  */
+    /* of a thread which holds the allocation lock in order	*/
+    /* to stop the world.  Thus concurrent modification of the	*/
+    /* data structure is impossible.				*/
+    if (me -> stop_info.last_stop_count == my_stop_count) {
+	/* Duplicate signal.  OK if we are retrying.	*/
+	if (!GC_retry_signals) {
+	    WARN("Duplicate suspend signal in thread %lx\n",
+		 pthread_self());
+	}
+	return;
+    }
+#   ifdef SPARC
+	me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
+#   else
+	me -> stop_info.stack_ptr = (ptr_t)(&dummy);
+#   endif
+#   ifdef IA64
+	me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack();
+#   endif
+
+    /* Tell the thread that wants to stop the world that this   */
+    /* thread has been stopped.  Note that sem_post() is  	*/
+    /* the only async-signal-safe primitive in LinuxThreads.    */
+    sem_post(&GC_suspend_ack_sem);
+    me -> stop_info.last_stop_count = my_stop_count;
+
+    /* Wait until that thread tells us to restart by sending    */
+    /* this thread a SIG_THR_RESTART signal.			*/
+    /* SIG_THR_RESTART should be masked at this point.  Thus there	*/
+    /* is no race.						*/
+    if (sigfillset(&mask) != 0) ABORT("sigfillset() failed");
+    if (sigdelset(&mask, SIG_THR_RESTART) != 0) ABORT("sigdelset() failed");
+#   ifdef NO_SIGNALS
+      if (sigdelset(&mask, SIGINT) != 0) ABORT("sigdelset() failed");
+      if (sigdelset(&mask, SIGQUIT) != 0) ABORT("sigdelset() failed");
+      if (sigdelset(&mask, SIGTERM) != 0) ABORT("sigdelset() failed");
+      if (sigdelset(&mask, SIGABRT) != 0) ABORT("sigdelset() failed");
+#   endif
+    do {
+	    me->stop_info.signal = 0;
+	    sigsuspend(&mask);             /* Wait for signal */
+    } while (me->stop_info.signal != SIG_THR_RESTART);
+    /* If the RESTART signal gets lost, we can still lose.  That should be  */
+    /* less likely than losing the SUSPEND signal, since we don't do much   */
+    /* between the sem_post and sigsuspend.	   			    */
+    /* We'd need more handshaking to work around that, since we don't want  */
+    /* to accidentally leave a RESTART signal pending, thus causing us to   */
+    /* continue prematurely in a future round.				    */ 
+
+#if DEBUG_THREADS
+    GC_printf1("Continuing 0x%lx\n", my_thread);
+#endif
+}
+
+void GC_restart_handler(int sig)
+{
+    pthread_t my_thread = pthread_self();
+    GC_thread me;
+
+    if (sig != SIG_THR_RESTART) ABORT("Bad signal in suspend_handler");
+
+    /* Let the GC_suspend_handler() know that we got a SIG_THR_RESTART. */
+    /* The lookup here is safe, since I'm doing this on behalf  */
+    /* of a thread which holds the allocation lock in order	*/
+    /* to stop the world.  Thus concurrent modification of the	*/
+    /* data structure is impossible.				*/
+    me = GC_lookup_thread(my_thread);
+    me->stop_info.signal = SIG_THR_RESTART;
+
+    /*
+    ** Note: even if we didn't do anything useful here,
+    ** it would still be necessary to have a signal handler,
+    ** rather than ignoring the signals, otherwise
+    ** the signals will not be delivered at all, and
+    ** will thus not interrupt the sigsuspend() above.
+    */
+
+#if DEBUG_THREADS
+    GC_printf1("In GC_restart_handler for 0x%lx\n", pthread_self());
+#endif
+}
+
+# ifdef IA64
+#   define IF_IA64(x) x
+# else
+#   define IF_IA64(x)
+# endif
+/* We hold allocation lock.  Should do exactly the right thing if the	*/
+/* world is stopped.  Should not fail if it isn't.			*/
+void GC_push_all_stacks()
+{
+    int i;
+    GC_thread p;
+    ptr_t lo, hi;
+    /* On IA64, we also need to scan the register backing store. */
+    IF_IA64(ptr_t bs_lo; ptr_t bs_hi;)
+    pthread_t me = pthread_self();
+    
+    if (!GC_thr_initialized) GC_thr_init();
+    #if DEBUG_THREADS
+        GC_printf1("Pushing stacks from thread 0x%lx\n", (unsigned long) me);
+    #endif
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+        if (p -> flags & FINISHED) continue;
+        if (pthread_equal(p -> id, me)) {
+#  	    ifdef SPARC
+	        lo = (ptr_t)GC_save_regs_in_stack();
+#  	    else
+ 	        lo = GC_approx_sp();
+#           endif
+	    IF_IA64(bs_hi = (ptr_t)GC_save_regs_in_stack();)
+	} else {
+	    lo = p -> stop_info.stack_ptr;
+	    IF_IA64(bs_hi = p -> backing_store_ptr;)
+	}
+        if ((p -> flags & MAIN_THREAD) == 0) {
+	    hi = p -> stack_end;
+	    IF_IA64(bs_lo = p -> backing_store_end);
+        } else {
+            /* The original stack. */
+            hi = GC_stackbottom;
+	    IF_IA64(bs_lo = BACKING_STORE_BASE;)
+        }
+        #if DEBUG_THREADS
+            GC_printf3("Stack for thread 0x%lx = [%lx,%lx)\n",
+    	        (unsigned long) p -> id,
+		(unsigned long) lo, (unsigned long) hi);
+        #endif
+	if (0 == lo) ABORT("GC_push_all_stacks: sp not set!\n");
+#       ifdef STACK_GROWS_UP
+	  /* We got them backwards! */
+          GC_push_all_stack(hi, lo);
+#       else
+          GC_push_all_stack(lo, hi);
+#	endif
+#	ifdef IA64
+          if (pthread_equal(p -> id, me)) {
+	    GC_push_all_eager(bs_lo, bs_hi);
+	  } else {
+	    GC_push_all_stack(bs_lo, bs_hi);
+	  }
+#	endif
+      }
+    }
+}
+
+/* There seems to be a very rare thread stopping problem.  To help us  */
+/* debug that, we save the ids of the stopping thread. */
+pthread_t GC_stopping_thread;
+int GC_stopping_pid;
+
+/* We hold the allocation lock.  Suspend all threads that might	*/
+/* still be running.  Return the number of suspend signals that	*/
+/* were sent. */
+int GC_suspend_all()
+{
+    int n_live_threads = 0;
+    int i;
+    GC_thread p;
+    int result;
+    pthread_t my_thread = pthread_self();
+    
+    GC_stopping_thread = my_thread;    /* debugging only.      */
+    GC_stopping_pid = getpid();                /* debugging only.      */
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+        if (p -> id != my_thread) {
+            if (p -> flags & FINISHED) continue;
+            if (p -> stop_info.last_stop_count == GC_stop_count) continue;
+	    if (p -> thread_blocked) /* Will wait */ continue;
+            n_live_threads++;
+	    #if DEBUG_THREADS
+	      GC_printf1("Sending suspend signal to 0x%lx\n", p -> id);
+	    #endif
+        
+        result = pthread_kill(p -> id, SIG_SUSPEND);
+	    switch(result) {
+                case ESRCH:
+                    /* Not really there anymore.  Possible? */
+                    n_live_threads--;
+                    break;
+                case 0:
+                    break;
+                default:
+                    ABORT("pthread_kill failed");
+            }
+        }
+      }
+    }
+    return n_live_threads;
+}
+
+/* Caller holds allocation lock.	*/
+void GC_stop_world()
+{
+    int i;
+    int n_live_threads;
+    int code;
+
+    #if DEBUG_THREADS
+    GC_printf1("Stopping the world from 0x%lx\n", pthread_self());
+    #endif
+       
+    /* Make sure all free list construction has stopped before we start. */
+    /* No new construction can start, since free list construction is	*/
+    /* required to acquire and release the GC lock before it starts,	*/
+    /* and we have the lock.						*/
+#   ifdef PARALLEL_MARK
+      GC_acquire_mark_lock();
+      GC_ASSERT(GC_fl_builder_count == 0);
+      /* We should have previously waited for it to become zero. */
+#   endif /* PARALLEL_MARK */
+    ++GC_stop_count;
+    n_live_threads = GC_suspend_all();
+
+      if (GC_retry_signals) {
+	  unsigned long wait_usecs = 0;  /* Total wait since retry.	*/
+#	  define WAIT_UNIT 3000
+#	  define RETRY_INTERVAL 100000
+	  for (;;) {
+	      int ack_count;
+
+	      sem_getvalue(&GC_suspend_ack_sem, &ack_count);
+	      if (ack_count == n_live_threads) break;
+	      if (wait_usecs > RETRY_INTERVAL) {
+		  int newly_sent = GC_suspend_all();
+
+#                 ifdef CONDPRINT
+		    if (GC_print_stats) {
+		      GC_printf1("Resent %ld signals after timeout\n",
+				 newly_sent);
+		    }
+#                 endif
+		  sem_getvalue(&GC_suspend_ack_sem, &ack_count);
+		  if (newly_sent < n_live_threads - ack_count) {
+		      WARN("Lost some threads during GC_stop_world?!\n",0);
+		      n_live_threads = ack_count + newly_sent;
+		  }
+		  wait_usecs = 0;
+	      }
+	      usleep(WAIT_UNIT);
+	      wait_usecs += WAIT_UNIT;
+	  }
+      }
+    for (i = 0; i < n_live_threads; i++) {
+	  if (0 != (code = sem_wait(&GC_suspend_ack_sem))) {
+	      GC_err_printf1("Sem_wait returned %ld\n", (unsigned long)code);
+	      ABORT("sem_wait for handler failed");
+	  }
+    }
+#   ifdef PARALLEL_MARK
+      GC_release_mark_lock();
+#   endif
+    #if DEBUG_THREADS
+      GC_printf1("World stopped from 0x%lx\n", pthread_self());
+    #endif
+    GC_stopping_thread = 0;  /* debugging only */
+}
+
+/* Caller holds allocation lock, and has held it continuously since	*/
+/* the world stopped.							*/
+void GC_start_world()
+{
+    pthread_t my_thread = pthread_self();
+    register int i;
+    register GC_thread p;
+    register int n_live_threads = 0;
+    register int result;
+
+#   if DEBUG_THREADS
+      GC_printf0("World starting\n");
+#   endif
+
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+        if (p -> id != my_thread) {
+            if (p -> flags & FINISHED) continue;
+	    if (p -> thread_blocked) continue;
+            n_live_threads++;
+	    #if DEBUG_THREADS
+	      GC_printf1("Sending restart signal to 0x%lx\n", p -> id);
+	    #endif
+        
+        result = pthread_kill(p -> id, SIG_THR_RESTART);
+	    switch(result) {
+                case ESRCH:
+                    /* Not really there anymore.  Possible? */
+                    n_live_threads--;
+                    break;
+                case 0:
+                    break;
+                default:
+                    ABORT("pthread_kill failed");
+            }
+        }
+      }
+    }
+    #if DEBUG_THREADS
+      GC_printf0("World started\n");
+    #endif
+}
+
+void GC_stop_init() {
+    struct sigaction act;
+    
+    if (sem_init(&GC_suspend_ack_sem, 0, 0) != 0)
+        ABORT("sem_init failed");
+
+    act.sa_flags = SA_RESTART;
+    if (sigfillset(&act.sa_mask) != 0) {
+    	ABORT("sigfillset() failed");
+    }
+#   ifdef NO_SIGNALS
+      if (sigdelset(&act.sa_mask, SIGINT) != 0
+	  || sigdelset(&act.sa_mask, SIGQUIT != 0)
+	  || sigdelset(&act.sa_mask, SIGABRT != 0)
+	  || sigdelset(&act.sa_mask, SIGTERM != 0)) {
+        ABORT("sigdelset() failed");
+      }
+#   endif
+
+    /* SIG_THR_RESTART is unmasked by the handler when necessary. 	*/
+    act.sa_handler = GC_suspend_handler;
+    if (sigaction(SIG_SUSPEND, &act, NULL) != 0) {
+    	ABORT("Cannot set SIG_SUSPEND handler");
+    }
+
+    act.sa_handler = GC_restart_handler;
+    if (sigaction(SIG_THR_RESTART, &act, NULL) != 0) {
+    	ABORT("Cannot set SIG_THR_RESTART handler");
+    }
+
+    /* Check for GC_RETRY_SIGNALS.	*/
+      if (0 != GETENV("GC_RETRY_SIGNALS")) {
+	  GC_retry_signals = TRUE;
+      }
+      if (0 != GETENV("GC_NO_RETRY_SIGNALS")) {
+	  GC_retry_signals = FALSE;
+      }
+#     ifdef CONDPRINT
+          if (GC_print_stats && GC_retry_signals) {
+              GC_printf0("Will retry suspend signal if necessary.\n");
+	  }
+#     endif
+}
+
+#endif
diff --git a/boehm-gc/pthread_support.c b/boehm-gc/pthread_support.c
new file mode 100644
index 00000000000..b302817bfdf
--- /dev/null
+++ b/boehm-gc/pthread_support.c
@@ -0,0 +1,1570 @@
+/* 
+ * Copyright (c) 1994 by Xerox Corporation.  All rights reserved.
+ * Copyright (c) 1996 by Silicon Graphics.  All rights reserved.
+ * Copyright (c) 1998 by Fergus Henderson.  All rights reserved.
+ * Copyright (c) 2000-2001 by Hewlett-Packard Company.  All rights reserved.
+ *
+ * THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+ * OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+ *
+ * Permission is hereby granted to use or copy this program
+ * for any purpose,  provided the above notices are retained on all copies.
+ * Permission to modify the code and to distribute modified code is granted,
+ * provided the above notices are retained, and a notice that the code was
+ * modified is included with the above copyright notice.
+ */
+/*
+ * Support code for LinuxThreads, the clone()-based kernel
+ * thread package for Linux which is included in libc6.
+ *
+ * This code relies on implementation details of LinuxThreads,
+ * (i.e. properties not guaranteed by the Pthread standard),
+ * though this version now does less of that than the other Pthreads
+ * support code.
+ *
+ * Note that there is a lot of code duplication between linux_threads.c
+ * and thread support for some of the other Posix platforms; any changes
+ * made here may need to be reflected there too.
+ */
+ /* DG/UX ix86 support <takis@xfree86.org> */
+/*
+ * Linux_threads.c now also includes some code to support HPUX and
+ * OSF1 (Compaq Tru64 Unix, really).  The OSF1 support is based on Eric Benson's
+ * patch.
+ *
+ * Eric also suggested an alternate basis for a lock implementation in
+ * his code:
+ * + #elif defined(OSF1)
+ * +    unsigned long GC_allocate_lock = 0;
+ * +    msemaphore GC_allocate_semaphore;
+ * + #  define GC_TRY_LOCK() \
+ * +    ((msem_lock(&GC_allocate_semaphore, MSEM_IF_NOWAIT) == 0) \
+ * +     ? (GC_allocate_lock = 1) \
+ * +     : 0)
+ * + #  define GC_LOCK_TAKEN GC_allocate_lock
+ */
+
+/*#define DEBUG_THREADS 1*/
+/*#define GC_ASSERTIONS*/
+
+# include "private/pthread_support.h"
+
+# if defined(GC_PTHREADS) && !defined(GC_SOLARIS_THREADS) \
+     && !defined(GC_IRIX_THREADS) && !defined(GC_WIN32_THREADS) \
+     && !defined(GC_AIX_THREADS)
+
+# if defined(GC_HPUX_THREADS) && !defined(USE_PTHREAD_SPECIFIC) \
+     && !defined(USE_HPUX_TLS)
+#   define USE_HPUX_TLS
+# endif
+
+# if (defined(GC_DGUX386_THREADS) || defined(GC_OSF1_THREADS) || \
+      defined(GC_DARWIN_THREADS)) && !defined(USE_PTHREAD_SPECIFIC)
+#   define USE_PTHREAD_SPECIFIC
+# endif
+
+# if defined(GC_DGUX386_THREADS) && !defined(_POSIX4A_DRAFT10_SOURCE)
+#   define _POSIX4A_DRAFT10_SOURCE 1
+# endif
+
+# if defined(GC_DGUX386_THREADS) && !defined(_USING_POSIX4A_DRAFT10)
+#   define _USING_POSIX4A_DRAFT10 1
+# endif
+
+# ifdef THREAD_LOCAL_ALLOC
+#   if !defined(USE_PTHREAD_SPECIFIC) && !defined(USE_HPUX_TLS)
+#     include "private/specific.h"
+#   endif
+#   if defined(USE_PTHREAD_SPECIFIC)
+#     define GC_getspecific pthread_getspecific
+#     define GC_setspecific pthread_setspecific
+#     define GC_key_create pthread_key_create
+      typedef pthread_key_t GC_key_t;
+#   endif
+#   if defined(USE_HPUX_TLS)
+#     define GC_getspecific(x) (x)
+#     define GC_setspecific(key, v) ((key) = (v), 0)
+#     define GC_key_create(key, d) 0
+      typedef void * GC_key_t;
+#   endif
+# endif
+# include <stdlib.h>
+# include <pthread.h>
+# include <sched.h>
+# include <time.h>
+# include <errno.h>
+# include <unistd.h>
+# include <sys/mman.h>
+# include <sys/time.h>
+# include <sys/types.h>
+# include <sys/stat.h>
+# include <fcntl.h>
+
+#if defined(GC_DARWIN_THREADS)
+# include "private/darwin_semaphore.h"
+#else
+# include <semaphore.h>
+#endif /* !GC_DARWIN_THREADS */
+
+#if defined(GC_DARWIN_THREADS)
+# include <sys/sysctl.h>
+#endif /* GC_DARWIN_THREADS */
+
+
+
+#if defined(GC_DGUX386_THREADS)
+# include <sys/dg_sys_info.h>
+# include <sys/_int_psem.h>
+  /* sem_t is an uint in DG/UX */
+  typedef unsigned int  sem_t;
+#endif /* GC_DGUX386_THREADS */
+
+#ifndef __GNUC__
+#   define __inline__
+#endif
+
+#ifdef GC_USE_LD_WRAP
+#   define WRAP_FUNC(f) __wrap_##f
+#   define REAL_FUNC(f) __real_##f
+#else
+#   define WRAP_FUNC(f) GC_##f
+#   if !defined(GC_DGUX386_THREADS)
+#     define REAL_FUNC(f) f
+#   else /* GC_DGUX386_THREADS */
+#     define REAL_FUNC(f) __d10_##f
+#   endif /* GC_DGUX386_THREADS */
+#   undef pthread_create
+#   if !defined(GC_DARWIN_THREADS)
+#     undef pthread_sigmask
+#   endif
+#   undef pthread_join
+#   undef pthread_detach
+#   if defined(GC_OSF1_THREADS) && defined(_PTHREAD_USE_MANGLED_NAMES_) \
+       && !defined(_PTHREAD_USE_PTDNAM_)
+/* Restore the original mangled names on Tru64 UNIX.  */
+#     define pthread_create __pthread_create
+#     define pthread_join __pthread_join
+#     define pthread_detach __pthread_detach
+#   endif
+#endif
+
+void GC_thr_init();
+
+static GC_bool parallel_initialized = FALSE;
+
+void GC_init_parallel();
+
+# if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+
+/* We don't really support thread-local allocation with DBG_HDRS_ALL */
+
+#ifdef USE_HPUX_TLS
+  __thread
+#endif
+GC_key_t GC_thread_key;
+
+static GC_bool keys_initialized;
+
+/* Recover the contents of the freelist array fl into the global one gfl.*/
+/* Note that the indexing scheme differs, in that gfl has finer size	*/
+/* resolution, even if not all entries are used.			*/
+/* We hold the allocator lock.						*/
+static void return_freelists(ptr_t *fl, ptr_t *gfl)
+{
+    int i;
+    ptr_t q, *qptr;
+    size_t nwords;
+
+    for (i = 1; i < NFREELISTS; ++i) {
+	nwords = i * (GRANULARITY/sizeof(word));
+        qptr = fl + i;	
+	q = *qptr;
+	if ((word)q >= HBLKSIZE) {
+	  if (gfl[nwords] == 0) {
+	    gfl[nwords] = q;
+	  } else {
+	    /* Concatenate: */
+	    for (; (word)q >= HBLKSIZE; qptr = &(obj_link(q)), q = *qptr);
+	    GC_ASSERT(0 == q);
+	    *qptr = gfl[nwords];
+	    gfl[nwords] = fl[i];
+	  }
+	}
+	/* Clear fl[i], since the thread structure may hang around.	*/
+	/* Do it in a way that is likely to trap if we access it.	*/
+	fl[i] = (ptr_t)HBLKSIZE;
+    }
+}
+
+/* We statically allocate a single "size 0" object. It is linked to	*/
+/* itself, and is thus repeatedly reused for all size 0 allocation	*/
+/* requests.  (Size 0 gcj allocation requests are incorrect, and	*/
+/* we arrange for those to fault asap.)					*/
+static ptr_t size_zero_object = (ptr_t)(&size_zero_object);
+
+/* Each thread structure must be initialized.	*/
+/* This call must be made from the new thread.	*/
+/* Caller holds allocation lock.		*/
+void GC_init_thread_local(GC_thread p)
+{
+    int i;
+
+    if (!keys_initialized) {
+	if (0 != GC_key_create(&GC_thread_key, 0)) {
+	    ABORT("Failed to create key for local allocator");
+        }
+	keys_initialized = TRUE;
+    }
+    if (0 != GC_setspecific(GC_thread_key, p)) {
+	ABORT("Failed to set thread specific allocation pointers");
+    }
+    for (i = 1; i < NFREELISTS; ++i) {
+	p -> ptrfree_freelists[i] = (ptr_t)1;
+	p -> normal_freelists[i] = (ptr_t)1;
+#	ifdef GC_GCJ_SUPPORT
+	  p -> gcj_freelists[i] = (ptr_t)1;
+#	endif
+    }   
+    /* Set up the size 0 free lists.	*/
+    p -> ptrfree_freelists[0] = (ptr_t)(&size_zero_object);
+    p -> normal_freelists[0] = (ptr_t)(&size_zero_object);
+#   ifdef GC_GCJ_SUPPORT
+        p -> gcj_freelists[0] = (ptr_t)(-1);
+#   endif
+}
+
+#ifdef GC_GCJ_SUPPORT
+  extern ptr_t * GC_gcjobjfreelist;
+#endif
+
+/* We hold the allocator lock.	*/
+void GC_destroy_thread_local(GC_thread p)
+{
+    /* We currently only do this from the thread itself or from	*/
+    /* the fork handler for a child process.			*/
+#   ifndef HANDLE_FORK
+      GC_ASSERT(GC_getspecific(GC_thread_key) == (void *)p);
+#   endif
+    return_freelists(p -> ptrfree_freelists, GC_aobjfreelist);
+    return_freelists(p -> normal_freelists, GC_objfreelist);
+#   ifdef GC_GCJ_SUPPORT
+   	return_freelists(p -> gcj_freelists, GC_gcjobjfreelist);
+#   endif
+}
+
+extern GC_PTR GC_generic_malloc_many();
+
+GC_PTR GC_local_malloc(size_t bytes)
+{
+    if (EXPECT(!SMALL_ENOUGH(bytes),0)) {
+        return(GC_malloc(bytes));
+    } else {
+	int index = INDEX_FROM_BYTES(bytes);
+	ptr_t * my_fl;
+	ptr_t my_entry;
+#	if defined(REDIRECT_MALLOC) && !defined(USE_PTHREAD_SPECIFIC)
+	GC_key_t k = GC_thread_key;
+#	endif
+	void * tsd;
+
+#	if defined(REDIRECT_MALLOC) && !defined(USE_PTHREAD_SPECIFIC)
+	    if (EXPECT(0 == k, 0)) {
+		/* This can happen if we get called when the world is	*/
+		/* being initialized.  Whether we can actually complete	*/
+		/* the initialization then is unclear.			*/
+		GC_init_parallel();
+		k = GC_thread_key;
+	    }
+#	endif
+	tsd = GC_getspecific(GC_thread_key);
+#	ifdef GC_ASSERTIONS
+	  LOCK();
+	  GC_ASSERT(tsd == (void *)GC_lookup_thread(pthread_self()));
+	  UNLOCK();
+#	endif
+	my_fl = ((GC_thread)tsd) -> normal_freelists + index;
+	my_entry = *my_fl;
+	if (EXPECT((word)my_entry >= HBLKSIZE, 1)) {
+	    ptr_t next = obj_link(my_entry);
+	    GC_PTR result = (GC_PTR)my_entry;
+	    *my_fl = next;
+	    obj_link(my_entry) = 0;
+	    PREFETCH_FOR_WRITE(next);
+	    return result;
+	} else if ((word)my_entry - 1 < DIRECT_GRANULES) {
+	    *my_fl = my_entry + index + 1;
+            return GC_malloc(bytes);
+	} else {
+	    GC_generic_malloc_many(BYTES_FROM_INDEX(index), NORMAL, my_fl);
+	    if (*my_fl == 0) return GC_oom_fn(bytes);
+	    return GC_local_malloc(bytes);
+	}
+    }
+}
+
+GC_PTR GC_local_malloc_atomic(size_t bytes)
+{
+    if (EXPECT(!SMALL_ENOUGH(bytes), 0)) {
+        return(GC_malloc_atomic(bytes));
+    } else {
+	int index = INDEX_FROM_BYTES(bytes);
+	ptr_t * my_fl = ((GC_thread)GC_getspecific(GC_thread_key))
+		        -> ptrfree_freelists + index;
+	ptr_t my_entry = *my_fl;
+    
+	if (EXPECT((word)my_entry >= HBLKSIZE, 1)) {
+	    GC_PTR result = (GC_PTR)my_entry;
+	    *my_fl = obj_link(my_entry);
+	    return result;
+	} else if ((word)my_entry - 1 < DIRECT_GRANULES) {
+	    *my_fl = my_entry + index + 1;
+        return GC_malloc_atomic(bytes);
+	} else {
+	    GC_generic_malloc_many(BYTES_FROM_INDEX(index), PTRFREE, my_fl);
+	    /* *my_fl is updated while the collector is excluded;	*/
+	    /* the free list is always visible to the collector as 	*/
+	    /* such.							*/
+	    if (*my_fl == 0) return GC_oom_fn(bytes);
+	    return GC_local_malloc_atomic(bytes);
+	}
+    }
+}
+
+#ifdef GC_GCJ_SUPPORT
+
+#include "include/gc_gcj.h"
+
+#ifdef GC_ASSERTIONS
+  extern GC_bool GC_gcj_malloc_initialized;
+#endif
+
+extern int GC_gcj_kind;
+
+GC_PTR GC_local_gcj_malloc(size_t bytes,
+			   void * ptr_to_struct_containing_descr)
+{
+    GC_ASSERT(GC_gcj_malloc_initialized);
+    if (EXPECT(!SMALL_ENOUGH(bytes), 0)) {
+        return GC_gcj_malloc(bytes, ptr_to_struct_containing_descr);
+    } else {
+	int index = INDEX_FROM_BYTES(bytes);
+	ptr_t * my_fl = ((GC_thread)GC_getspecific(GC_thread_key))
+	                -> gcj_freelists + index;
+	ptr_t my_entry = *my_fl;
+	if (EXPECT((word)my_entry >= HBLKSIZE, 1)) {
+	    GC_PTR result = (GC_PTR)my_entry;
+	    GC_ASSERT(!GC_incremental);
+	    /* We assert that any concurrent marker will stop us.	*/
+	    /* Thus it is impossible for a mark procedure to see the 	*/
+	    /* allocation of the next object, but to see this object 	*/
+	    /* still containing a free list pointer.  Otherwise the 	*/
+	    /* marker might find a random "mark descriptor".		*/
+	    *(volatile ptr_t *)my_fl = obj_link(my_entry);
+	    /* We must update the freelist before we store the pointer.	*/
+	    /* Otherwise a GC at this point would see a corrupted	*/
+	    /* free list.						*/
+	    /* A memory barrier is probably never needed, since the 	*/
+	    /* action of stopping this thread will cause prior writes	*/
+	    /* to complete.						*/
+	    GC_ASSERT(((void * volatile *)result)[1] == 0); 
+	    *(void * volatile *)result = ptr_to_struct_containing_descr; 
+	    return result;
+	} else if ((word)my_entry - 1 < DIRECT_GRANULES) {
+	    if (!GC_incremental) *my_fl = my_entry + index + 1;
+	    	/* In the incremental case, we always have to take this */
+	    	/* path.  Thus we leave the counter alone.		*/
+            return GC_gcj_malloc(bytes, ptr_to_struct_containing_descr);
+	} else {
+	    GC_generic_malloc_many(BYTES_FROM_INDEX(index), GC_gcj_kind, my_fl);
+	    if (*my_fl == 0) return GC_oom_fn(bytes);
+	    return GC_local_gcj_malloc(bytes, ptr_to_struct_containing_descr);
+	}
+    }
+}
+
+#endif /* GC_GCJ_SUPPORT */
+
+# else  /* !THREAD_LOCAL_ALLOC  && !DBG_HDRS_ALL */
+
+#   define GC_destroy_thread_local(t)
+
+# endif /* !THREAD_LOCAL_ALLOC */
+
+#if 0
+/*
+To make sure that we're using LinuxThreads and not some other thread
+package, we generate a dummy reference to `pthread_kill_other_threads_np'
+(was `__pthread_initial_thread_bos' but that disappeared),
+which is a symbol defined in LinuxThreads, but (hopefully) not in other
+thread packages.
+
+We no longer do this, since this code is now portable enough that it might
+actually work for something else.
+*/
+void (*dummy_var_to_force_linux_threads)() = pthread_kill_other_threads_np;
+#endif /* 0 */
+
+long GC_nprocs = 1;	/* Number of processors.  We may not have	*/
+			/* access to all of them, but this is as good	*/
+			/* a guess as any ...				*/
+
+#ifdef PARALLEL_MARK
+
+# ifndef MAX_MARKERS
+#   define MAX_MARKERS 16
+# endif
+
+static ptr_t marker_sp[MAX_MARKERS] = {0};
+
+void * GC_mark_thread(void * id)
+{
+  word my_mark_no = 0;
+
+  marker_sp[(word)id] = GC_approx_sp();
+  for (;; ++my_mark_no) {
+    /* GC_mark_no is passed only to allow GC_help_marker to terminate	*/
+    /* promptly.  This is important if it were called from the signal	*/
+    /* handler or from the GC lock acquisition code.  Under Linux, it's	*/
+    /* not safe to call it from a signal handler, since it uses mutexes	*/
+    /* and condition variables.  Since it is called only here, the 	*/
+    /* argument is unnecessary.						*/
+    if (my_mark_no < GC_mark_no || my_mark_no > GC_mark_no + 2) {
+	/* resynchronize if we get far off, e.g. because GC_mark_no	*/
+	/* wrapped.							*/
+	my_mark_no = GC_mark_no;
+    }
+#   ifdef DEBUG_THREADS
+	GC_printf1("Starting mark helper for mark number %ld\n", my_mark_no);
+#   endif
+    GC_help_marker(my_mark_no);
+  }
+}
+
+extern long GC_markers;		/* Number of mark threads we would	*/
+				/* like to have.  Includes the 		*/
+				/* initiating thread.			*/
+
+pthread_t GC_mark_threads[MAX_MARKERS];
+
+#define PTHREAD_CREATE REAL_FUNC(pthread_create)
+
+static void start_mark_threads()
+{
+    unsigned i;
+    pthread_attr_t attr;
+
+    if (GC_markers > MAX_MARKERS) {
+	WARN("Limiting number of mark threads\n", 0);
+	GC_markers = MAX_MARKERS;
+    }
+    if (0 != pthread_attr_init(&attr)) ABORT("pthread_attr_init failed");
+	
+    if (0 != pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_DETACHED))
+	ABORT("pthread_attr_setdetachstate failed");
+
+#   if defined(HPUX) || defined(GC_DGUX386_THREADS)
+      /* Default stack size is usually too small: fix it. */
+      /* Otherwise marker threads or GC may run out of	  */
+      /* space.						  */
+#     define MIN_STACK_SIZE (8*HBLKSIZE*sizeof(word))
+      {
+	size_t old_size;
+	int code;
+
+        if (pthread_attr_getstacksize(&attr, &old_size) != 0)
+	  ABORT("pthread_attr_getstacksize failed\n");
+	if (old_size < MIN_STACK_SIZE) {
+	  if (pthread_attr_setstacksize(&attr, MIN_STACK_SIZE) != 0)
+		  ABORT("pthread_attr_setstacksize failed\n");
+	}
+      }
+#   endif /* HPUX || GC_DGUX386_THREADS */
+#   ifdef CONDPRINT
+      if (GC_print_stats) {
+	GC_printf1("Starting %ld marker threads\n", GC_markers - 1);
+      }
+#   endif
+    for (i = 0; i < GC_markers - 1; ++i) {
+      if (0 != PTHREAD_CREATE(GC_mark_threads + i, &attr,
+			      GC_mark_thread, (void *)(word)i)) {
+	WARN("Marker thread creation failed, errno = %ld.\n", errno);
+      }
+    }
+}
+
+#else  /* !PARALLEL_MARK */
+
+static __inline__ void start_mark_threads()
+{
+}
+
+#endif /* !PARALLEL_MARK */
+
+/* Defining INSTALL_LOOPING_SEGV_HANDLER causes SIGSEGV and SIGBUS to 	*/
+/* result in an infinite loop in a signal handler.  This can be very	*/
+/* useful for debugging, since (as of RH7) gdb still seems to have	*/
+/* serious problems with threads.					*/
+#ifdef INSTALL_LOOPING_SEGV_HANDLER
+void GC_looping_handler(int sig)
+{
+    GC_printf3("Signal %ld in thread %lx, pid %ld\n",
+	       sig, pthread_self(), getpid());
+    for (;;);
+}
+#endif
+
+GC_bool GC_thr_initialized = FALSE;
+
+volatile GC_thread GC_threads[THREAD_TABLE_SZ];
+
+void GC_push_thread_structures GC_PROTO((void))
+{
+    GC_push_all((ptr_t)(GC_threads), (ptr_t)(GC_threads)+sizeof(GC_threads));
+#   if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+      GC_push_all((ptr_t)(&GC_thread_key),
+	  (ptr_t)(&GC_thread_key)+sizeof(&GC_thread_key));
+#   endif
+}
+
+#ifdef THREAD_LOCAL_ALLOC
+/* We must explicitly mark ptrfree and gcj free lists, since the free 	*/
+/* list links wouldn't otherwise be found.  We also set them in the 	*/
+/* normal free lists, since that involves touching less memory than if	*/
+/* we scanned them normally.						*/
+void GC_mark_thread_local_free_lists(void)
+{
+    int i, j;
+    GC_thread p;
+    ptr_t q;
+    
+    for (i = 0; i < THREAD_TABLE_SZ; ++i) {
+      for (p = GC_threads[i]; 0 != p; p = p -> next) {
+	for (j = 1; j < NFREELISTS; ++j) {
+	  q = p -> ptrfree_freelists[j];
+	  if ((word)q > HBLKSIZE) GC_set_fl_marks(q);
+	  q = p -> normal_freelists[j];
+	  if ((word)q > HBLKSIZE) GC_set_fl_marks(q);
+#	  ifdef GC_GCJ_SUPPORT
+	    q = p -> gcj_freelists[j];
+	    if ((word)q > HBLKSIZE) GC_set_fl_marks(q);
+#	  endif /* GC_GCJ_SUPPORT */
+	}
+      }
+    }
+}
+#endif /* THREAD_LOCAL_ALLOC */
+
+static struct GC_Thread_Rep first_thread;
+
+/* Add a thread to GC_threads.  We assume it wasn't already there.	*/
+/* Caller holds allocation lock.					*/
+GC_thread GC_new_thread(pthread_t id)
+{
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    GC_thread result;
+    static GC_bool first_thread_used = FALSE;
+    
+    if (!first_thread_used) {
+    	result = &first_thread;
+    	first_thread_used = TRUE;
+    } else {
+        result = (struct GC_Thread_Rep *)
+        	 GC_INTERNAL_MALLOC(sizeof(struct GC_Thread_Rep), NORMAL);
+    }
+    if (result == 0) return(0);
+    result -> id = id;
+    result -> next = GC_threads[hv];
+    GC_threads[hv] = result;
+    GC_ASSERT(result -> flags == 0 && result -> thread_blocked == 0);
+    return(result);
+}
+
+/* Delete a thread from GC_threads.  We assume it is there.	*/
+/* (The code intentionally traps if it wasn't.)			*/
+/* Caller holds allocation lock.				*/
+void GC_delete_thread(pthread_t id)
+{
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    register GC_thread p = GC_threads[hv];
+    register GC_thread prev = 0;
+    
+    while (!pthread_equal(p -> id, id)) {
+        prev = p;
+        p = p -> next;
+    }
+    if (prev == 0) {
+        GC_threads[hv] = p -> next;
+    } else {
+        prev -> next = p -> next;
+    }
+    GC_INTERNAL_FREE(p);
+}
+
+/* If a thread has been joined, but we have not yet		*/
+/* been notified, then there may be more than one thread 	*/
+/* in the table with the same pthread id.			*/
+/* This is OK, but we need a way to delete a specific one.	*/
+void GC_delete_gc_thread(pthread_t id, GC_thread gc_id)
+{
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    register GC_thread p = GC_threads[hv];
+    register GC_thread prev = 0;
+
+    while (p != gc_id) {
+        prev = p;
+        p = p -> next;
+    }
+    if (prev == 0) {
+        GC_threads[hv] = p -> next;
+    } else {
+        prev -> next = p -> next;
+    }
+    GC_INTERNAL_FREE(p);
+}
+
+/* Return a GC_thread corresponding to a given thread_t.	*/
+/* Returns 0 if it's not there.					*/
+/* Caller holds  allocation lock or otherwise inhibits 		*/
+/* updates.							*/
+/* If there is more than one thread with the given id we 	*/
+/* return the most recent one.					*/
+GC_thread GC_lookup_thread(pthread_t id)
+{
+    int hv = ((word)id) % THREAD_TABLE_SZ;
+    register GC_thread p = GC_threads[hv];
+    
+    while (p != 0 && !pthread_equal(p -> id, id)) p = p -> next;
+    return(p);
+}
+
+#ifdef HANDLE_FORK
+/* Remove all entries from the GC_threads table, except the	*/
+/* one for the current thread.  We need to do this in the child	*/
+/* process after a fork(), since only the current thread 	*/
+/* survives in the child.					*/
+void GC_remove_all_threads_but_me(void)
+{
+    pthread_t self = pthread_self();
+    int hv;
+    GC_thread p, next, me;
+
+    for (hv = 0; hv < THREAD_TABLE_SZ; ++hv) {
+      me = 0;
+      for (p = GC_threads[hv]; 0 != p; p = next) {
+	next = p -> next;
+	if (p -> id == self) {
+	  me = p;
+	  p -> next = 0;
+	} else {
+#	  ifdef THREAD_LOCAL_ALLOC
+	    if (!(p -> flags & FINISHED)) {
+	      GC_destroy_thread_local(p);
+	    }
+#	  endif /* THREAD_LOCAL_ALLOC */
+	  if (p != &first_thread) GC_INTERNAL_FREE(p);
+	}
+      }
+      GC_threads[hv] = me;
+    }
+}
+#endif /* HANDLE_FORK */
+
+#ifdef USE_PROC_FOR_LIBRARIES
+int GC_segment_is_thread_stack(ptr_t lo, ptr_t hi)
+{
+    int i;
+    GC_thread p;
+    
+#   ifdef PARALLEL_MARK
+      for (i = 0; i < GC_markers; ++i) {
+	if (marker_sp[i] > lo & marker_sp[i] < hi) return 1;
+      }
+#   endif
+    for (i = 0; i < THREAD_TABLE_SZ; i++) {
+      for (p = GC_threads[i]; p != 0; p = p -> next) {
+	if (0 != p -> stack_end) {
+#	  ifdef STACK_GROWS_UP
+            if (p -> stack_end >= lo && p -> stack_end < hi) return 1;
+#	  else /* STACK_GROWS_DOWN */
+            if (p -> stack_end > lo && p -> stack_end <= hi) return 1;
+#	  endif
+	}
+      }
+    }
+    return 0;
+}
+#endif /* USE_PROC_FOR_LIBRARIES */
+
+#ifdef GC_LINUX_THREADS
+/* Return the number of processors, or i<= 0 if it can't be determined.	*/
+int GC_get_nprocs()
+{
+    /* Should be "return sysconf(_SC_NPROCESSORS_ONLN);" but that	*/
+    /* appears to be buggy in many cases.				*/
+    /* We look for lines "cpu<n>" in /proc/stat.			*/
+#   define STAT_BUF_SIZE 4096
+#   define STAT_READ read
+	/* If read is wrapped, this may need to be redefined to call 	*/
+	/* the real one.						*/
+    char stat_buf[STAT_BUF_SIZE];
+    int f;
+    word result = 1;
+	/* Some old kernels only have a single "cpu nnnn ..."	*/
+	/* entry in /proc/stat.  We identify those as 		*/
+	/* uniprocessors.					*/
+    size_t i, len = 0;
+
+    f = open("/proc/stat", O_RDONLY);
+    if (f < 0 || (len = STAT_READ(f, stat_buf, STAT_BUF_SIZE)) < 100) {
+	WARN("Couldn't read /proc/stat\n", 0);
+	return -1;
+    }
+    for (i = 0; i < len - 100; ++i) {
+        if (stat_buf[i] == '\n' && stat_buf[i+1] == 'c'
+	    && stat_buf[i+2] == 'p' && stat_buf[i+3] == 'u') {
+	    int cpu_no = atoi(stat_buf + i + 4);
+	    if (cpu_no >= result) result = cpu_no + 1;
+	}
+    }
+    close(f);
+    return result;
+}
+#endif /* GC_LINUX_THREADS */
+
+/* We hold the GC lock.  Wait until an in-progress GC has finished.	*/
+/* Repeatedly RELEASES GC LOCK in order to wait.			*/
+/* If wait_for_all is true, then we exit with the GC lock held and no	*/
+/* collection in progress; otherwise we just wait for the current GC	*/
+/* to finish.								*/
+extern GC_bool GC_collection_in_progress();
+void GC_wait_for_gc_completion(GC_bool wait_for_all)
+{
+    if (GC_incremental && GC_collection_in_progress()) {
+	int old_gc_no = GC_gc_no;
+
+	/* Make sure that no part of our stack is still on the mark stack, */
+	/* since it's about to be unmapped.				   */
+	while (GC_incremental && GC_collection_in_progress()
+	       && (wait_for_all || old_gc_no == GC_gc_no)) {
+	    ENTER_GC();
+            GC_collect_a_little_inner(1);
+	    EXIT_GC();
+	    UNLOCK();
+	    sched_yield();
+	    LOCK();
+	}
+    }
+}
+
+#ifdef HANDLE_FORK
+/* Procedures called before and after a fork.  The goal here is to make */
+/* it safe to call GC_malloc() in a forked child.  It's unclear that is	*/
+/* attainable, since the single UNIX spec seems to imply that one 	*/
+/* should only call async-signal-safe functions, and we probably can't	*/
+/* quite guarantee that.  But we give it our best shot.  (That same	*/
+/* spec also implies that it's not safe to call the system malloc	*/
+/* between fork() and exec().  Thus we're doing no worse than it.	*/
+
+/* Called before a fork()		*/
+void GC_fork_prepare_proc(void)
+{
+    /* Acquire all relevant locks, so that after releasing the locks	*/
+    /* the child will see a consistent state in which monitor 		*/
+    /* invariants hold.	 Unfortunately, we can't acquire libc locks	*/
+    /* we might need, and there seems to be no guarantee that libc	*/
+    /* must install a suitable fork handler.				*/
+    /* Wait for an ongoing GC to finish, since we can't finish it in	*/
+    /* the (one remaining thread in) the child.				*/
+      LOCK();
+#     if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
+        GC_wait_for_reclaim();
+#     endif
+      GC_wait_for_gc_completion(TRUE);
+#     if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
+        GC_acquire_mark_lock();
+#     endif
+}
+
+/* Called in parent after a fork()	*/
+void GC_fork_parent_proc(void)
+{
+#   if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
+      GC_release_mark_lock();
+#   endif
+    UNLOCK();
+}
+
+/* Called in child after a fork()	*/
+void GC_fork_child_proc(void)
+{
+    /* Clean up the thread table, so that just our thread is left. */
+#   if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
+      GC_release_mark_lock();
+#   endif
+    GC_remove_all_threads_but_me();
+#   ifdef PARALLEL_MARK
+      /* Turn off parallel marking in the child, since we are probably 	*/
+      /* just going to exec, and we would have to restart mark threads.	*/
+        GC_markers = 1;
+        GC_parallel = FALSE;
+#   endif /* PARALLEL_MARK */
+    UNLOCK();
+}
+#endif /* HANDLE_FORK */
+
+#if defined(GC_DGUX386_THREADS)
+/* Return the number of processors, or i<= 0 if it can't be determined. */
+int GC_get_nprocs()
+{
+    /* <takis@XFree86.Org> */
+    int numCpus;
+    struct dg_sys_info_pm_info pm_sysinfo;
+    int status =0;
+
+    status = dg_sys_info((long int *) &pm_sysinfo,
+	DG_SYS_INFO_PM_INFO_TYPE, DG_SYS_INFO_PM_CURRENT_VERSION);
+    if (status < 0)
+       /* set -1 for error */
+       numCpus = -1;
+    else
+      /* Active CPUs */
+      numCpus = pm_sysinfo.idle_vp_count;
+
+#  ifdef DEBUG_THREADS
+    GC_printf1("Number of active CPUs in this system: %d\n", numCpus);
+#  endif
+    return(numCpus);
+}
+#endif /* GC_DGUX386_THREADS */
+
+/* We hold the allocation lock.	*/
+void GC_thr_init()
+{
+#	ifndef GC_DARWIN_THREADS
+        int dummy;
+#	endif
+    GC_thread t;
+
+    if (GC_thr_initialized) return;
+    GC_thr_initialized = TRUE;
+    
+#   ifdef HANDLE_FORK
+      /* Prepare for a possible fork.	*/
+        pthread_atfork(GC_fork_prepare_proc, GC_fork_parent_proc,
+	  	       GC_fork_child_proc);
+#   endif /* HANDLE_FORK */
+    /* Add the initial thread, so we can stop it.	*/
+      t = GC_new_thread(pthread_self());
+#     ifdef GC_DARWIN_THREADS
+         t -> stop_info.mach_thread = mach_thread_self();
+#     else
+         t -> stop_info.stack_ptr = (ptr_t)(&dummy);
+#     endif
+      t -> flags = DETACHED | MAIN_THREAD;
+
+    GC_stop_init();
+
+    /* Set GC_nprocs.  */
+      {
+	char * nprocs_string = GETENV("GC_NPROCS");
+	GC_nprocs = -1;
+	if (nprocs_string != NULL) GC_nprocs = atoi(nprocs_string);
+      }
+      if (GC_nprocs <= 0) {
+#       if defined(GC_HPUX_THREADS)
+	  GC_nprocs = pthread_num_processors_np();
+#       endif
+#	if defined(GC_OSF1_THREADS)
+	  GC_nprocs = sysconf(_SC_NPROCESSORS_ONLN);
+	  if (GC_nprocs <= 0) GC_nprocs = 1;
+#	endif
+#       if defined(GC_FREEBSD_THREADS)
+          GC_nprocs = 1;
+#       endif
+#       if defined(GC_DARWIN_THREADS)
+	  int ncpus = 1;
+	  size_t len = sizeof(ncpus);
+	  sysctl((int[2]) {CTL_HW, HW_NCPU}, 2, &ncpus, &len, NULL, 0);
+	  GC_nprocs = ncpus;
+#       endif
+#	if defined(GC_LINUX_THREADS) || defined(GC_DGUX386_THREADS)
+          GC_nprocs = GC_get_nprocs();
+#	endif
+      }
+      if (GC_nprocs <= 0) {
+	WARN("GC_get_nprocs() returned %ld\n", GC_nprocs);
+	GC_nprocs = 2;
+#	ifdef PARALLEL_MARK
+	  GC_markers = 1;
+#	endif
+      } else {
+#	ifdef PARALLEL_MARK
+          {
+	    char * markers_string = GETENV("GC_MARKERS");
+	    if (markers_string != NULL) {
+	      GC_markers = atoi(markers_string);
+	    } else {
+	      GC_markers = GC_nprocs;
+	    }
+          }
+#	endif
+      }
+#   ifdef PARALLEL_MARK
+#     ifdef CONDPRINT
+        if (GC_print_stats) {
+          GC_printf2("Number of processors = %ld, "
+		 "number of marker threads = %ld\n", GC_nprocs, GC_markers);
+	}
+#     endif
+      if (GC_markers == 1) {
+	GC_parallel = FALSE;
+#	ifdef CONDPRINT
+	  if (GC_print_stats) {
+	    GC_printf0("Single marker thread, turning off parallel marking\n");
+	  }
+#	endif
+      } else {
+	GC_parallel = TRUE;
+	/* Disable true incremental collection, but generational is OK.	*/
+	GC_time_limit = GC_TIME_UNLIMITED;
+      }
+#   endif
+}
+
+
+/* Perform all initializations, including those that	*/
+/* may require allocation.				*/
+/* Called without allocation lock.			*/
+/* Must be called before a second thread is created.	*/
+/* Called without allocation lock.			*/
+void GC_init_parallel()
+{
+    if (parallel_initialized) return;
+    parallel_initialized = TRUE;
+
+    /* GC_init() calls us back, so set flag first.	*/
+    if (!GC_is_initialized) GC_init();
+    /* If we are using a parallel marker, start the helper threads.  */
+#     ifdef PARALLEL_MARK
+        if (GC_parallel) start_mark_threads();
+#     endif
+    /* Initialize thread local free lists if used.	*/
+#   if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+      LOCK();
+      GC_init_thread_local(GC_lookup_thread(pthread_self()));
+      UNLOCK();
+#   endif
+}
+
+
+#if !defined(GC_DARWIN_THREADS)
+int WRAP_FUNC(pthread_sigmask)(int how, const sigset_t *set, sigset_t *oset)
+{
+    sigset_t fudged_set;
+    
+    if (set != NULL && (how == SIG_BLOCK || how == SIG_SETMASK)) {
+        fudged_set = *set;
+        sigdelset(&fudged_set, SIG_SUSPEND);
+        set = &fudged_set;
+    }
+    return(REAL_FUNC(pthread_sigmask)(how, set, oset));
+}
+#endif /* !GC_DARWIN_THREADS */
+
+/* Wrappers for functions that are likely to block for an appreciable	*/
+/* length of time.  Must be called in pairs, if at all.			*/
+/* Nothing much beyond the system call itself should be executed	*/
+/* between these.							*/
+
+void GC_start_blocking(void) {
+#   define SP_SLOP 128
+    GC_thread me;
+    LOCK();
+    me = GC_lookup_thread(pthread_self());
+    GC_ASSERT(!(me -> thread_blocked));
+#   ifdef SPARC
+	me -> stop_info.stack_ptr = (ptr_t)GC_save_regs_in_stack();
+#   else
+#   ifndef GC_DARWIN_THREADS
+	me -> stop_info.stack_ptr = (ptr_t)GC_approx_sp();
+#   endif
+#   endif
+#   ifdef IA64
+	me -> backing_store_ptr = (ptr_t)GC_save_regs_in_stack() + SP_SLOP;
+#   endif
+    /* Add some slop to the stack pointer, since the wrapped call may 	*/
+    /* end up pushing more callee-save registers.			*/
+#   ifndef GC_DARWIN_THREADS
+#   ifdef STACK_GROWS_UP
+	me -> stop_info.stack_ptr += SP_SLOP;
+#   else
+	me -> stop_info.stack_ptr -= SP_SLOP;
+#   endif
+#   endif
+    me -> thread_blocked = TRUE;
+    UNLOCK();
+}
+
+void GC_end_blocking(void) {
+    GC_thread me;
+    LOCK();   /* This will block if the world is stopped.	*/
+    me = GC_lookup_thread(pthread_self());
+    GC_ASSERT(me -> thread_blocked);
+    me -> thread_blocked = FALSE;
+    UNLOCK();
+}
+    
+#if defined(GC_DGUX386_THREADS)
+#define __d10_sleep sleep
+#endif /* GC_DGUX386_THREADS */
+
+/* A wrapper for the standard C sleep function	*/
+int WRAP_FUNC(sleep) (unsigned int seconds)
+{
+    int result;
+
+    GC_start_blocking();
+    result = REAL_FUNC(sleep)(seconds);
+    GC_end_blocking();
+    return result;
+}
+
+struct start_info {
+    void *(*start_routine)(void *);
+    void *arg;
+    word flags;
+    sem_t registered;   	/* 1 ==> in our thread table, but 	*/
+				/* parent hasn't yet noticed.		*/
+};
+
+/* Called at thread exit.				*/
+/* Never called for main thread.  That's OK, since it	*/
+/* results in at most a tiny one-time leak.  And 	*/
+/* linuxthreads doesn't reclaim the main threads 	*/
+/* resources or id anyway.				*/
+void GC_thread_exit_proc(void *arg)
+{
+    GC_thread me;
+
+    LOCK();
+    me = GC_lookup_thread(pthread_self());
+    GC_destroy_thread_local(me);
+    if (me -> flags & DETACHED) {
+    	GC_delete_thread(pthread_self());
+    } else {
+	me -> flags |= FINISHED;
+    }
+#   if defined(THREAD_LOCAL_ALLOC) && !defined(USE_PTHREAD_SPECIFIC) \
+       && !defined(USE_HPUX_TLS) && !defined(DBG_HDRS_ALL)
+      GC_remove_specific(GC_thread_key);
+#   endif
+    GC_wait_for_gc_completion(FALSE);
+    UNLOCK();
+}
+
+int WRAP_FUNC(pthread_join)(pthread_t thread, void **retval)
+{
+    int result;
+    GC_thread thread_gc_id;
+    
+    LOCK();
+    thread_gc_id = GC_lookup_thread(thread);
+    /* This is guaranteed to be the intended one, since the thread id	*/
+    /* cant have been recycled by pthreads.				*/
+    UNLOCK();
+    result = REAL_FUNC(pthread_join)(thread, retval);
+# if defined (GC_FREEBSD_THREADS)
+    /* On FreeBSD, the wrapped pthread_join() sometimes returns (what
+       appears to be) a spurious EINTR which caused the test and real code
+       to gratuitously fail.  Having looked at system pthread library source
+       code, I see how this return code may be generated.  In one path of
+       code, pthread_join() just returns the errno setting of the thread
+       being joined.  This does not match the POSIX specification or the
+       local man pages thus I have taken the liberty to catch this one
+       spurious return value properly conditionalized on GC_FREEBSD_THREADS. */
+    if (result == EINTR) result = 0;
+# endif
+    if (result == 0) {
+        LOCK();
+        /* Here the pthread thread id may have been recycled. */
+        GC_delete_gc_thread(thread, thread_gc_id);
+        UNLOCK();
+    }
+    return result;
+}
+
+int
+WRAP_FUNC(pthread_detach)(pthread_t thread)
+{
+    int result;
+    GC_thread thread_gc_id;
+    
+    LOCK();
+    thread_gc_id = GC_lookup_thread(thread);
+    UNLOCK();
+    result = REAL_FUNC(pthread_detach)(thread);
+    if (result == 0) {
+      LOCK();
+      thread_gc_id -> flags |= DETACHED;
+      /* Here the pthread thread id may have been recycled. */
+      if (thread_gc_id -> flags & FINISHED) {
+        GC_delete_gc_thread(thread, thread_gc_id);
+      }
+      UNLOCK();
+    }
+    return result;
+}
+
+void * GC_start_routine(void * arg)
+{
+    int dummy;
+    struct start_info * si = arg;
+    void * result;
+    GC_thread me;
+    pthread_t my_pthread;
+    void *(*start)(void *);
+    void *start_arg;
+
+    my_pthread = pthread_self();
+#   ifdef DEBUG_THREADS
+        GC_printf1("Starting thread 0x%lx\n", my_pthread);
+        GC_printf1("pid = %ld\n", (long) getpid());
+        GC_printf1("sp = 0x%lx\n", (long) &arg);
+#   endif
+    LOCK();
+    me = GC_new_thread(my_pthread);
+#ifdef GC_DARWIN_THREADS
+    me -> stop_info.mach_thread = mach_thread_self();
+#else
+    me -> stop_info.stack_ptr = 0;
+#endif
+    me -> flags = si -> flags;
+    /* me -> stack_end = GC_linux_stack_base(); -- currently (11/99)	*/
+    /* doesn't work because the stack base in /proc/self/stat is the 	*/
+    /* one for the main thread.  There is a strong argument that that's	*/
+    /* a kernel bug, but a pervasive one.				*/
+#   ifdef STACK_GROWS_DOWN
+      me -> stack_end = (ptr_t)(((word)(&dummy) + (GC_page_size - 1))
+		                & ~(GC_page_size - 1));
+#	  ifndef GC_DARWIN_THREADS
+        me -> stop_info.stack_ptr = me -> stack_end - 0x10;
+#	  endif
+	/* Needs to be plausible, since an asynchronous stack mark	*/
+	/* should not crash.						*/
+#   else
+      me -> stack_end = (ptr_t)((word)(&dummy) & ~(GC_page_size - 1));
+      me -> stop_info.stack_ptr = me -> stack_end + 0x10;
+#   endif
+    /* This is dubious, since we may be more than a page into the stack, */
+    /* and hence skip some of it, though it's not clear that matters.	 */
+#   ifdef IA64
+      me -> backing_store_end = (ptr_t)
+			(GC_save_regs_in_stack() & ~(GC_page_size - 1));
+      /* This is also < 100% convincing.  We should also read this 	*/
+      /* from /proc, but the hook to do so isn't there yet.		*/
+#   endif /* IA64 */
+    UNLOCK();
+    start = si -> start_routine;
+#   ifdef DEBUG_THREADS
+	GC_printf1("start_routine = 0x%lx\n", start);
+#   endif
+    start_arg = si -> arg;
+    sem_post(&(si -> registered));	/* Last action on si.	*/
+    					/* OK to deallocate.	*/
+    pthread_cleanup_push(GC_thread_exit_proc, 0);
+#   if defined(THREAD_LOCAL_ALLOC) && !defined(DBG_HDRS_ALL)
+ 	LOCK();
+        GC_init_thread_local(me);
+	UNLOCK();
+#   endif
+    result = (*start)(start_arg);
+#if DEBUG_THREADS
+        GC_printf1("Finishing thread 0x%x\n", pthread_self());
+#endif
+    me -> status = result;
+    pthread_cleanup_pop(1);
+    /* Cleanup acquires lock, ensuring that we can't exit		*/
+    /* while a collection that thinks we're alive is trying to stop     */
+    /* us.								*/
+    return(result);
+}
+
+int
+WRAP_FUNC(pthread_create)(pthread_t *new_thread,
+		  const pthread_attr_t *attr,
+                  void *(*start_routine)(void *), void *arg)
+{
+    int result;
+    int detachstate;
+    word my_flags = 0;
+    struct start_info * si; 
+	/* This is otherwise saved only in an area mmapped by the thread */
+	/* library, which isn't visible to the collector.		 */
+ 
+    /* We resist the temptation to muck with the stack size here,	*/
+    /* even if the default is unreasonably small.  That's the client's	*/
+    /* responsibility.							*/
+
+    LOCK();
+    si = (struct start_info *)GC_INTERNAL_MALLOC(sizeof(struct start_info),
+						 NORMAL);
+    UNLOCK();
+    if (!parallel_initialized) GC_init_parallel();
+    if (0 == si) return(ENOMEM);
+    sem_init(&(si -> registered), 0, 0);
+    si -> start_routine = start_routine;
+    si -> arg = arg;
+    LOCK();
+    if (!GC_thr_initialized) GC_thr_init();
+#   ifdef GC_ASSERTIONS
+      {
+	int stack_size;
+	if (NULL == attr) {
+	   pthread_attr_t my_attr;
+	   pthread_attr_init(&my_attr);
+	   pthread_attr_getstacksize(&my_attr, &stack_size);
+	} else {
+	   pthread_attr_getstacksize(attr, &stack_size);
+	}
+	GC_ASSERT(stack_size >= (8*HBLKSIZE*sizeof(word)));
+	/* Our threads may need to do some work for the GC.	*/
+	/* Ridiculously small threads won't work, and they	*/
+	/* probably wouldn't work anyway.			*/
+      }
+#   endif
+    if (NULL == attr) {
+	detachstate = PTHREAD_CREATE_JOINABLE;
+    } else { 
+        pthread_attr_getdetachstate(attr, &detachstate);
+    }
+    if (PTHREAD_CREATE_DETACHED == detachstate) my_flags |= DETACHED;
+    si -> flags = my_flags;
+    UNLOCK();
+#   ifdef DEBUG_THREADS
+        GC_printf1("About to start new thread from thread 0x%X\n",
+		   pthread_self());
+#   endif
+
+    result = REAL_FUNC(pthread_create)(new_thread, attr, GC_start_routine, si);
+
+#   ifdef DEBUG_THREADS
+        GC_printf1("Started thread 0x%X\n", *new_thread);
+#   endif
+    /* Wait until child has been added to the thread table.		*/
+    /* This also ensures that we hold onto si until the child is done	*/
+    /* with it.  Thus it doesn't matter whether it is otherwise		*/
+    /* visible to the collector.					*/
+    if (0 == result) {
+	while (0 != sem_wait(&(si -> registered))) {
+            if (EINTR != errno) ABORT("sem_wait failed");
+	}
+    }
+    sem_destroy(&(si -> registered));
+    LOCK();
+    GC_INTERNAL_FREE(si);
+    UNLOCK();
+
+    return(result);
+}
+
+#ifdef GENERIC_COMPARE_AND_SWAP
+  pthread_mutex_t GC_compare_and_swap_lock = PTHREAD_MUTEX_INITIALIZER;
+
+  GC_bool GC_compare_and_exchange(volatile GC_word *addr,
+  			          GC_word old, GC_word new_val)
+  {
+    GC_bool result;
+    pthread_mutex_lock(&GC_compare_and_swap_lock);
+    if (*addr == old) {
+      *addr = new_val;
+      result = TRUE;
+    } else {
+      result = FALSE;
+    }
+    pthread_mutex_unlock(&GC_compare_and_swap_lock);
+    return result;
+  }
+  
+  GC_word GC_atomic_add(volatile GC_word *addr, GC_word how_much)
+  {
+    GC_word old;
+    pthread_mutex_lock(&GC_compare_and_swap_lock);
+    old = *addr;
+    *addr = old + how_much;
+    pthread_mutex_unlock(&GC_compare_and_swap_lock);
+    return old;
+  }
+
+#endif /* GENERIC_COMPARE_AND_SWAP */
+/* Spend a few cycles in a way that can't introduce contention with	*/
+/* othre threads.							*/
+void GC_pause()
+{
+    int i;
+#	ifndef __GNUC__
+        volatile word dummy = 0;
+#	endif
+
+    for (i = 0; i < 10; ++i) { 
+#     ifdef __GNUC__
+        __asm__ __volatile__ (" " : : : "memory");
+#     else
+	/* Something that's unlikely to be optimized away. */
+	GC_noop(++dummy);
+#     endif
+    }
+}
+    
+#define SPIN_MAX 1024	/* Maximum number of calls to GC_pause before	*/
+			/* give up.					*/
+
+VOLATILE GC_bool GC_collecting = 0;
+			/* A hint that we're in the collector and       */
+                        /* holding the allocation lock for an           */
+                        /* extended period.                             */
+
+#if !defined(USE_SPIN_LOCK) || defined(PARALLEL_MARK)
+/* If we don't want to use the below spinlock implementation, either	*/
+/* because we don't have a GC_test_and_set implementation, or because 	*/
+/* we don't want to risk sleeping, we can still try spinning on 	*/
+/* pthread_mutex_trylock for a while.  This appears to be very		*/
+/* beneficial in many cases.						*/
+/* I suspect that under high contention this is nearly always better	*/
+/* than the spin lock.  But it's a bit slower on a uniprocessor.	*/
+/* Hence we still default to the spin lock.				*/
+/* This is also used to acquire the mark lock for the parallel		*/
+/* marker.								*/
+
+/* Here we use a strict exponential backoff scheme.  I don't know 	*/
+/* whether that's better or worse than the above.  We eventually 	*/
+/* yield by calling pthread_mutex_lock(); it never makes sense to	*/
+/* explicitly sleep.							*/
+
+void GC_generic_lock(pthread_mutex_t * lock)
+{
+#ifndef NO_PTHREAD_TRYLOCK
+    unsigned pause_length = 1;
+    unsigned i;
+    
+    if (0 == pthread_mutex_trylock(lock)) return;
+    for (; pause_length <= SPIN_MAX; pause_length <<= 1) {
+	for (i = 0; i < pause_length; ++i) {
+	    GC_pause();
+	}
+        switch(pthread_mutex_trylock(lock)) {
+	    case 0:
+		return;
+	    case EBUSY:
+		break;
+	    default:
+		ABORT("Unexpected error from pthread_mutex_trylock");
+        }
+    }
+#endif /* !NO_PTHREAD_TRYLOCK */
+    pthread_mutex_lock(lock);
+}
+
+#endif /* !USE_SPIN_LOCK || PARALLEL_MARK */
+
+#if defined(USE_SPIN_LOCK)
+
+/* Reasonably fast spin locks.  Basically the same implementation */
+/* as STL alloc.h.  This isn't really the right way to do this.   */
+/* but until the POSIX scheduling mess gets straightened out ...  */
+
+volatile unsigned int GC_allocate_lock = 0;
+
+
+void GC_lock()
+{
+#   define low_spin_max 30  /* spin cycles if we suspect uniprocessor */
+#   define high_spin_max SPIN_MAX /* spin cycles for multiprocessor */
+    static unsigned spin_max = low_spin_max;
+    unsigned my_spin_max;
+    static unsigned last_spins = 0;
+    unsigned my_last_spins;
+    int i;
+
+    if (!GC_test_and_set(&GC_allocate_lock)) {
+        return;
+    }
+    my_spin_max = spin_max;
+    my_last_spins = last_spins;
+    for (i = 0; i < my_spin_max; i++) {
+        if (GC_collecting || GC_nprocs == 1) goto yield;
+        if (i < my_last_spins/2 || GC_allocate_lock) {
+            GC_pause();
+            continue;
+        }
+        if (!GC_test_and_set(&GC_allocate_lock)) {
+	    /*
+             * got it!
+             * Spinning worked.  Thus we're probably not being scheduled
+             * against the other process with which we were contending.
+             * Thus it makes sense to spin longer the next time.
+	     */
+            last_spins = i;
+            spin_max = high_spin_max;
+            return;
+        }
+    }
+    /* We are probably being scheduled against the other process.  Sleep. */
+    spin_max = low_spin_max;
+yield:
+    for (i = 0;; ++i) {
+        if (!GC_test_and_set(&GC_allocate_lock)) {
+            return;
+        }
+#       define SLEEP_THRESHOLD 12
+		/* Under Linux very short sleeps tend to wait until	*/
+		/* the current time quantum expires.  On old Linux	*/
+		/* kernels nanosleep(<= 2ms) just spins under Linux.    */
+		/* (Under 2.4, this happens only for real-time		*/
+		/* processes.)  We want to minimize both behaviors	*/
+		/* here.						*/
+        if (i < SLEEP_THRESHOLD) {
+            sched_yield();
+	} else {
+	    struct timespec ts;
+	
+	    if (i > 24) i = 24;
+			/* Don't wait for more than about 15msecs, even	*/
+			/* under extreme contention.			*/
+	    ts.tv_sec = 0;
+	    ts.tv_nsec = 1 << i;
+	    nanosleep(&ts, 0);
+	}
+    }
+}
+
+#else  /* !USE_SPINLOCK */
+void GC_lock()
+{
+#ifndef NO_PTHREAD_TRYLOCK
+    if (1 == GC_nprocs || GC_collecting) {
+	pthread_mutex_lock(&GC_allocate_ml);
+    } else {
+        GC_generic_lock(&GC_allocate_ml);
+    }
+#else  /* !NO_PTHREAD_TRYLOCK */
+    pthread_mutex_lock(&GC_allocate_ml);
+#endif /* !NO_PTHREAD_TRYLOCK */
+}
+
+#endif /* !USE_SPINLOCK */
+
+#if defined(PARALLEL_MARK) || defined(THREAD_LOCAL_ALLOC)
+
+#ifdef GC_ASSERTIONS
+  pthread_t GC_mark_lock_holder = NO_THREAD;
+#endif
+
+#if 0
+  /* Ugly workaround for a linux threads bug in the final versions      */
+  /* of glibc2.1.  Pthread_mutex_trylock sets the mutex owner           */
+  /* field even when it fails to acquire the mutex.  This causes        */
+  /* pthread_cond_wait to die.  Remove for glibc2.2.                    */
+  /* According to the man page, we should use                           */
+  /* PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP, but that isn't actually   */
+  /* defined.                                                           */
+  static pthread_mutex_t mark_mutex =
+        {0, 0, 0, PTHREAD_MUTEX_ERRORCHECK_NP, {0, 0}};
+#else
+  static pthread_mutex_t mark_mutex = PTHREAD_MUTEX_INITIALIZER;
+#endif
+
+static pthread_cond_t builder_cv = PTHREAD_COND_INITIALIZER;
+
+void GC_acquire_mark_lock()
+{
+/*
+    if (pthread_mutex_lock(&mark_mutex) != 0) {
+	ABORT("pthread_mutex_lock failed");
+    }
+*/
+    GC_generic_lock(&mark_mutex);
+#   ifdef GC_ASSERTIONS
+	GC_mark_lock_holder = pthread_self();
+#   endif
+}
+
+void GC_release_mark_lock()
+{
+    GC_ASSERT(GC_mark_lock_holder == pthread_self());
+#   ifdef GC_ASSERTIONS
+	GC_mark_lock_holder = NO_THREAD;
+#   endif
+    if (pthread_mutex_unlock(&mark_mutex) != 0) {
+	ABORT("pthread_mutex_unlock failed");
+    }
+}
+
+/* Collector must wait for a freelist builders for 2 reasons:		*/
+/* 1) Mark bits may still be getting examined without lock.		*/
+/* 2) Partial free lists referenced only by locals may not be scanned 	*/
+/*    correctly, e.g. if they contain "pointer-free" objects, since the	*/
+/*    free-list link may be ignored.					*/
+void GC_wait_builder()
+{
+    GC_ASSERT(GC_mark_lock_holder == pthread_self());
+#   ifdef GC_ASSERTIONS
+	GC_mark_lock_holder = NO_THREAD;
+#   endif
+    if (pthread_cond_wait(&builder_cv, &mark_mutex) != 0) {
+	ABORT("pthread_cond_wait failed");
+    }
+    GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
+#   ifdef GC_ASSERTIONS
+	GC_mark_lock_holder = pthread_self();
+#   endif
+}
+
+void GC_wait_for_reclaim()
+{
+    GC_acquire_mark_lock();
+    while (GC_fl_builder_count > 0) {
+	GC_wait_builder();
+    }
+    GC_release_mark_lock();
+}
+
+void GC_notify_all_builder()
+{
+    GC_ASSERT(GC_mark_lock_holder == pthread_self());
+    if (pthread_cond_broadcast(&builder_cv) != 0) {
+	ABORT("pthread_cond_broadcast failed");
+    }
+}
+
+#endif /* PARALLEL_MARK || THREAD_LOCAL_ALLOC */
+
+#ifdef PARALLEL_MARK
+
+static pthread_cond_t mark_cv = PTHREAD_COND_INITIALIZER;
+
+void GC_wait_marker()
+{
+    GC_ASSERT(GC_mark_lock_holder == pthread_self());
+#   ifdef GC_ASSERTIONS
+	GC_mark_lock_holder = NO_THREAD;
+#   endif
+    if (pthread_cond_wait(&mark_cv, &mark_mutex) != 0) {
+	ABORT("pthread_cond_wait failed");
+    }
+    GC_ASSERT(GC_mark_lock_holder == NO_THREAD);
+#   ifdef GC_ASSERTIONS
+	GC_mark_lock_holder = pthread_self();
+#   endif
+}
+
+void GC_notify_all_marker()
+{
+    if (pthread_cond_broadcast(&mark_cv) != 0) {
+	ABORT("pthread_cond_broadcast failed");
+    }
+}
+
+#endif /* PARALLEL_MARK */
+
+# endif /* GC_LINUX_THREADS and friends */
+
author	jsturm <jsturm@138bc75d-0d04-0410-961f-82ee72b054a4>	2003-07-28 03:46:07 +0000
committer	jsturm <jsturm@138bc75d-0d04-0410-961f-82ee72b054a4>	2003-07-28 03:46:07 +0000
commit	715e3bd3f343537e75f28ab83f73d8c20ed7929f (patch)
tree	c698ff9b5618f81b32dbda3a94016046ef2e1a48
parent	2f61b8cae7e804b377aede07f9d06291244ff64d (diff)
download	gcc-715e3bd3f343537e75f28ab83f73d8c20ed7929f.tar.gz