gc6.2 tarball importgc6_2

author: Ivan Maidanski <ivmai@mail.ru> 2011-07-26 17:28:12 +0400
committer: Ivan Maidanski <ivmai@mail.ru> 2011-07-26 17:28:12 +0400
commit: 111a44f98adde07d205c92656ad9b935ca2a39a8 (patch)
tree: 8a0cb4e60f636fd09dd0d2e1a3a7f3a4ac0a1bb0 /doc
parent: f3632431e72d48bc7772b0752e29bb1e2a0901c6 (diff)
download: bdwgc-111a44f98adde07d205c92656ad9b935ca2a39a8.tar.gz
12 files changed, 1165 insertions, 83 deletions
diff --git a/doc/Makefile.am b/doc/Makefile.am
new file mode 100644
index 00000000..91446305
--- /dev/null
+++ b/doc/Makefile.am
@@ -0,0 +1,27 @@
+# 
+# 
+# THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+# OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+# 
+# Permission is hereby granted to use or copy this program
+# for any purpose,  provided the above notices are retained on all copies.
+# Permission to modify the code and to distribute modified code is granted,
+# provided the above notices are retained, and a notice that the code was
+# modified is included with the above copyright notice.
+#
+# Modified by: Grzegorz Jakacki <jakacki at acm dot org>
+
+## Process this file with automake to produce Makefile.in.
+
+# installed documentation
+#
+dist_pkgdata_DATA = barrett_diagram debugging.html gc.man \
+    gcdescr.html README README.amiga README.arm.cross \
+    README.autoconf README.changes README.contributors \
+    README.cords README.DGUX386 README.dj README.environment \
+    README.ews4800 README.hp README.linux README.Mac \
+    README.MacOSX README.macros README.OS2 README.rs6000 \
+    README.sgi README.solaris2 README.uts README.win32 \
+    tree.html leak.html gcinterface.html scale.html \
+    README.darwin
+
diff --git a/doc/Makefile.in b/doc/Makefile.in
new file mode 100644
index 00000000..9bf1ff5f
--- /dev/null
+++ b/doc/Makefile.in
@@ -0,0 +1,282 @@
+# Makefile.in generated by automake 1.6.3 from Makefile.am.
+# @configure_input@
+
+# Copyright 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002
+# Free Software Foundation, Inc.
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+# 
+# 
+# THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY EXPRESSED
+# OR IMPLIED.  ANY USE IS AT YOUR OWN RISK.
+# 
+# Permission is hereby granted to use or copy this program
+# for any purpose,  provided the above notices are retained on all copies.
+# Permission to modify the code and to distribute modified code is granted,
+# provided the above notices are retained, and a notice that the code was
+# modified is included with the above copyright notice.
+#
+# Modified by: Grzegorz Jakacki <jakacki at acm dot org>
+SHELL = @SHELL@
+
+srcdir = @srcdir@
+top_srcdir = @top_srcdir@
+VPATH = @srcdir@
+prefix = @prefix@
+exec_prefix = @exec_prefix@
+
+bindir = @bindir@
+sbindir = @sbindir@
+libexecdir = @libexecdir@
+datadir = @datadir@
+sysconfdir = @sysconfdir@
+sharedstatedir = @sharedstatedir@
+localstatedir = @localstatedir@
+libdir = @libdir@
+infodir = @infodir@
+mandir = @mandir@
+includedir = @includedir@
+oldincludedir = /usr/include
+pkgdatadir = $(datadir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+top_builddir = ..
+
+ACLOCAL = @ACLOCAL@
+AUTOCONF = @AUTOCONF@
+AUTOMAKE = @AUTOMAKE@
+AUTOHEADER = @AUTOHEADER@
+
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+INSTALL = @INSTALL@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_DATA = @INSTALL_DATA@
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = @program_transform_name@
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+host_alias = @host_alias@
+host_triplet = @host@
+
+EXEEXT = @EXEEXT@
+OBJEXT = @OBJEXT@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+AMTAR = @AMTAR@
+AR = @AR@
+AS = @AS@
+AWK = @AWK@
+CC = @CC@
+CCAS = @CCAS@
+CCASFLAGS = @CCASFLAGS@
+CFLAGS = @CFLAGS@
+CXX = @CXX@
+CXXFLAGS = @CXXFLAGS@
+CXXINCLUDES = @CXXINCLUDES@
+DEPDIR = @DEPDIR@
+DLLTOOL = @DLLTOOL@
+ECHO = @ECHO@
+EXTRA_TEST_LIBS = @EXTRA_TEST_LIBS@
+GC_CFLAGS = @GC_CFLAGS@
+GC_VERSION = @GC_VERSION@
+INCLUDES = @INCLUDES@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+LIBTOOL = @LIBTOOL@
+LN_S = @LN_S@
+MAINT = @MAINT@
+MY_CFLAGS = @MY_CFLAGS@
+OBJDUMP = @OBJDUMP@
+PACKAGE = @PACKAGE@
+RANLIB = @RANLIB@
+STRIP = @STRIP@
+THREADLIBS = @THREADLIBS@
+VERSION = @VERSION@
+addincludes = @addincludes@
+addlibs = @addlibs@
+addobjs = @addobjs@
+addtests = @addtests@
+am__include = @am__include@
+am__quote = @am__quote@
+install_sh = @install_sh@
+target_all = @target_all@
+
+# installed documentation
+#
+dist_pkgdata_DATA = barrett_diagram debugging.html gc.man \
+    gcdescr.html README README.amiga README.arm.cross \
+    README.autoconf README.changes README.contributors \
+    README.cords README.DGUX386 README.dj README.environment \
+    README.ews4800 README.hp README.linux README.Mac \
+    README.MacOSX README.macros README.OS2 README.rs6000 \
+    README.sgi README.solaris2 README.uts README.win32 \
+    tree.html leak.html gcinterface.html scale.html \
+    README.darwin
+
+subdir = doc
+mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs
+CONFIG_CLEAN_FILES =
+DIST_SOURCES =
+DATA = $(dist_pkgdata_DATA)
+
+DIST_COMMON = README $(dist_pkgdata_DATA) Makefile.am Makefile.in
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ Makefile.am  $(top_srcdir)/configure.in $(ACLOCAL_M4)
+	cd $(top_srcdir) && \
+	  $(AUTOMAKE) --gnu  doc/Makefile
+Makefile: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.in  $(top_builddir)/config.status
+	cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__depfiles_maybe)
+
+mostlyclean-libtool:
+	-rm -f *.lo
+
+clean-libtool:
+	-rm -rf .libs _libs
+
+distclean-libtool:
+	-rm -f libtool
+uninstall-info-am:
+dist_pkgdataDATA_INSTALL = $(INSTALL_DATA)
+install-dist_pkgdataDATA: $(dist_pkgdata_DATA)
+	@$(NORMAL_INSTALL)
+	$(mkinstalldirs) $(DESTDIR)$(pkgdatadir)
+	@list='$(dist_pkgdata_DATA)'; for p in $$list; do \
+	  if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  echo " $(dist_pkgdataDATA_INSTALL) $$d$$p $(DESTDIR)$(pkgdatadir)/$$f"; \
+	  $(dist_pkgdataDATA_INSTALL) $$d$$p $(DESTDIR)$(pkgdatadir)/$$f; \
+	done
+
+uninstall-dist_pkgdataDATA:
+	@$(NORMAL_UNINSTALL)
+	@list='$(dist_pkgdata_DATA)'; for p in $$list; do \
+	  f="`echo $$p | sed -e 's|^.*/||'`"; \
+	  echo " rm -f $(DESTDIR)$(pkgdatadir)/$$f"; \
+	  rm -f $(DESTDIR)$(pkgdatadir)/$$f; \
+	done
+tags: TAGS
+TAGS:
+
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+
+top_distdir = ..
+distdir = $(top_distdir)/$(PACKAGE)-$(VERSION)
+
+distdir: $(DISTFILES)
+	@list='$(DISTFILES)'; for file in $$list; do \
+	  if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+	  dir=`echo "$$file" | sed -e 's,/[^/]*$$,,'`; \
+	  if test "$$dir" != "$$file" && test "$$dir" != "."; then \
+	    dir="/$$dir"; \
+	    $(mkinstalldirs) "$(distdir)$$dir"; \
+	  else \
+	    dir=''; \
+	  fi; \
+	  if test -d $$d/$$file; then \
+	    if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+	      cp -pR $(srcdir)/$$file $(distdir)$$dir || exit 1; \
+	    fi; \
+	    cp -pR $$d/$$file $(distdir)$$dir || exit 1; \
+	  else \
+	    test -f $(distdir)/$$file \
+	    || cp -p $$d/$$file $(distdir)/$$file \
+	    || exit 1; \
+	  fi; \
+	done
+check-am: all-am
+check: check-am
+all-am: Makefile $(DATA)
+
+installdirs:
+	$(mkinstalldirs) $(DESTDIR)$(pkgdatadir)
+
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+	@$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+	$(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+	  INSTALL_STRIP_FLAG=-s \
+	  `test -z '$(STRIP)' || \
+	    echo "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'"` install
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+	-rm -f Makefile $(CONFIG_CLEAN_FILES)
+
+maintainer-clean-generic:
+	@echo "This command is intended for maintainers to use"
+	@echo "it deletes files that may require special tools to rebuild."
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+
+distclean-am: clean-am distclean-generic distclean-libtool
+
+dvi: dvi-am
+
+dvi-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-dist_pkgdataDATA
+
+install-exec-am:
+
+install-info: install-info-am
+
+install-man:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+uninstall-am: uninstall-dist_pkgdataDATA uninstall-info-am
+
+.PHONY: all all-am check check-am clean clean-generic clean-libtool \
+	distclean distclean-generic distclean-libtool distdir dvi \
+	dvi-am info info-am install install-am install-data \
+	install-data-am install-dist_pkgdataDATA install-exec \
+	install-exec-am install-info install-info-am install-man \
+	install-strip installcheck installcheck-am installdirs \
+	maintainer-clean maintainer-clean-generic mostlyclean \
+	mostlyclean-generic mostlyclean-libtool uninstall uninstall-am \
+	uninstall-dist_pkgdataDATA uninstall-info-am
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/doc/README b/doc/README
index d6dbec90..04e24a9a 100644
--- a/doc/README
+++ b/doc/README
@@ -1,7 +1,7 @@
 Copyright (c) 1988, 1989 Hans-J. Boehm, Alan J. Demers
 Copyright (c) 1991-1996 by Xerox Corporation.  All rights reserved.
 Copyright (c) 1996-1999 by Silicon Graphics.  All rights reserved.
-Copyright (c) 1999-2001 by Hewlett-Packard Company. All rights reserved.
+Copyright (c) 1999-2003 by Hewlett-Packard Company. All rights reserved.
 
 The file linux_threads.c is also
 Copyright (c) 1998 by Fergus Henderson.  All rights reserved.
@@ -28,7 +28,7 @@ are GPL'ed, but with an exception that should cover all uses in the
 collector.  (If you are concerned about such things, I recommend you look
 at the notice in config.guess or ltmain.sh.)
 
-This is version 6.1 of a conservative garbage collector for C and C++.
+This is version 6.2 of a conservative garbage collector for C and C++.
 
 You might find a more recent version of this at
 
diff --git a/doc/README.MacOSX b/doc/README.MacOSX
index 2abf0b40..f5333d51 100644
--- a/doc/README.MacOSX
+++ b/doc/README.MacOSX
@@ -1,27 +1 @@
-While the GC should work on MacOS X Server, MacOS X and Darwin, I only tested
-it on MacOS X Server.
-I've added a PPC assembly version of GC_push_regs(), thus the setjmp() hack is
-no longer necessary. Incremental collection is supported via mprotect/signal.
-The current solution isn't really optimal because the signal handler must decode
-the faulting PPC machine instruction in order to find the correct heap address.
-Further, it must poke around in the register state which the kernel saved away
-in some obscure register state structure before it calls the signal handler -
-needless to say the layout of this structure is no where documented.
-Threads and dynamic libraries are not yet supported (adding dynamic library
-support via the low-level dyld API shouldn't be that hard).
-
-The original MacOS X port was brought to you by Andrew Stone.
-
-
-June, 1 2000
-
-Dietmar Planitzer
-dave.pl@ping.at
-
-Note from Andrew Begel:
-
-One more fix to enable gc.a to link successfully into a shared library for
-MacOS X. You have to add -fno-common to the CFLAGS in the Makefile. MacOSX
-disallows common symbols in anything that eventually finds its way into a
-shared library. (I don't completely understand why, but -fno-common seems to
-work and doesn't mess up the garbage collector's functionality).
+See README.darwin for the latest Darwin/MacOSX information.
diff --git a/doc/README.changes b/doc/README.changes
index 3a49e062..1a0fcb38 100644
--- a/doc/README.changes
+++ b/doc/README.changes
@@ -1676,16 +1676,216 @@ Since 6.1alpha5:
    to work correctly with these.
  - Fixed Linux USE_PROC_FOR_LIBRARIES to work with a 64-bit /proc format.
 
+Since 6.1:
+ - Guard the test for GC_DUMP_REGULARLY in misc.c with
+   "#ifndef NO_DEBUGGING".  Otherwise it fails to build with NO_DEBUGGING
+   defined.  (Thanks to Manuel Serrano.)
+ - Message about retrying suspend signals was incorrectly generated even when
+   flag was not set.
+ - Cleaned up MACOSX/NEXT root registration code.  There was apparently a
+   separate ifdef case in GC_register_data_segments() for no reason.
+ - Removed MPROTECT_VDB for MACOSX port, based on one negative report.
+ - Arrange for gc.h and friends to be correctly installed with GNU-style
+   "make install".
+ - Enable the GNU-style build facility include C++ support in the library
+   with --enable-cplusplus. (Thanks to Thomas Maier for some of the patch.)
+ - Mark from GC_thread_key in linux_threads.c, in case that's allocated
+   from the garbage collected heap, as it is with our own thread-specific
+   storage implementation.  (Thanks to Jeff Sturm.)
+ - Mark all free list header blocks if they are heap allocated.  This avoids
+   some unnecessary tracing.  And it remains correct if we clear the
+   root set. (Thanks to Jeff Sturm for identifying the bug.)
+ - Improved S390/Linux support.  Add S390/Linux 64-bit support.  (Thanks
+   to Ulrich Weigand.)
+ - Corrected the spelling of GC_{M,C}ALLOC_EXPLICTLY_TYPED to
+   GC_{M,C}ALLOC_EXPLICITLY_TYPED in gc_typed.h.  This is technically
+   an interface change.  Based on the fact that nobody reported this,
+   I suspect/hope there were no clients.
+ - Cleaned up gc_typed.h so that (1) it adds an extern "C" declaration
+   when appropriate, (2) doesn't generate references to undefined internal
+   macros, and (3) allows easier manual construction of descriptors.
+ - Close the file descriptor used by GC_print_address_map().
+ - Set the "close-on-exec" bit for various file descriptors maintained
+   for the collector's internal use.
+ - Added a hack to find memory segments owned by the system allocator
+   under win32.  Based on my tests, this tends to eventually find all
+   segments, though it may take a while.  There appear to be cleaner,
+   but slower solutions under NT/XP.  But they rely on an API that's
+   unsupported under 9X.
+ - Changed Linux PowerPC stack finding to LINUX_STACKBOTTOM.  (Thanks
+   to Akira Tagoh for pointing out that HEURISTIC1 doesn't work on
+   64-bit kernels.)
+ - Added GC_set_free_space_divisor to avoid some Windows dll issues.
+ - Added FIXUP_POINTER, POINTER_SHIFT, POINTER_MASK to allow preprocessing
+   of candidate pointers for tagging, etc.
+ - Always lock around GC_notify_full_gc().  Simplified code for
+   invoking GC_notify_full_gc().
+ - Changed the way DATASTART is defined on FreeBSD to be robust against
+   an unmapped page after etext.  (Thanks to Hironori Sakamoto for
+   tracking down the intermittent failure.)
+ - Made GC_enable() and GC_disable() official.  Deprecated direct update
+   of GC_dont_gc.  Changed GC_gcollect to be a noop when garbage collection
+   is disabled.
+ - Call GC_register_dynamic_libraries before stopping the world on Linux,
+   in order to avoid a potential deadlock due to the dl_iterate_phdr lock.
+ - Introduced a more general mechanism for platform-dependent code to
+   decide whether the main data segment should be handled separately
+   from dynamic libraries, or registered by GC_register_dynamic_libraries.
+   The latter is more reliable and easier on Linux with dl_iterate_phdr. 
+
+Since 6.2alpha1:
+ - Fixed the completely broken FreeBSD code in 6.2alpha1.  (Thanks to
+   Hironori Sakamoto for the patch.)
+ - Changed IRIX reference in dbg_mlc.c to IRIX5. (Thanks to Marcus Herbert.)
+ - Attempted to work around the problems with .S filenames and the SGI
+   compiler.  (Reported by several people. Untested.)
+ - Worked around an HP/UX make issue with the GNU-style build process.
+ - Fixed the --enable-cplusplus build machinery to allow builds without
+   a C++ compiler.  (That was always the intent ...)
+ - Changed the debugging allocation macros to explicitly pass the return
+   address for Linux and XXXBSD on hardware for which we can't get stack
+   traces.  Use __builtin_return_address(0) to generate it when possible.
+   Some of the configuration work was cleaned up (good) and moved to gc.h
+   (bad, but necessary).  This should make leak detection more useful
+   on a number of platforms.  (Thanks to Fabian Thylman for the suggestion.)
+ - Fixed compilation problems in dbg_mlc.c with GC_ADD_CALLER.
+ - Bumped revision number for dynamic library.
+
+Since 6.2alpha2:
+ - Don't include execinfo.h in os_dep.c when it's not needed, and may not exist.
+
+Since 6.2alpha3:
+ - Use LINUX_STACKBOTTOM for >= glibc2.2 on Linux/MIPS.  (See Debian bug
+   # 177204)
+ - Integrated Jeff Sturm and Jesse Rosenstock's MACOSX threads patches.
+ - Integrated Grzegorz Jakacki's substantial GNU build patch.  "Make dist"
+   should now work for the GNU build process.  Documentation files
+   are installed under share/gc.
+ - Tweaked gc_cpp.h to again support the Borland compiler.  (Thanks to
+   Rene Girard for pointing out the problems.)
+ - Updated BCC_MAKEFILE (thanks to Rene Girard).
+ - Added GC_ASSERT check for minimum thread stack size.
+ - Added --enable-gc-assertions.
+ - Added some web documentation to the distribution.  Updated it in the
+   process.
+ - Separate gc_conf_macros.h from gc.h.
+ - Added generic GC_THREADS client-defined macro to set the appropriate
+   GC_XXX_THREADS internal macro.  (gc_config_macros.h.)
+ - Add debugging versions of _ignore_off_page allocation primitves.
+ - Moved declarations of GC_make_closure and GC_debug_invoke_finalizer
+   from gc.h to gc_priv.h.
+ - Reset GC_fail_count even if only a small allocation succeeds.
+ - Integrated Brian Alliet's patch for dynamic library support on Darwin.
+ - gc_cpp.h's gc_cleanup destructor called GC_REGISTER_FINALIZER_IGNORE_SELF
+   when it should have called the lower case version, since it was
+   explicitly computing a base pointer.
+
+Since 6.2alpha4:
+ - GC_invoke_finalizers could, under rare conditions, set
+   GC_finalizer_mem_freed to an essentially random value.  This could
+   possibly cause unbounded heap growth for long-running applications
+   under some conditions.  (The bug was introduced in 6.1alpha5, and
+   is not in gcc3.3.  Thanks to Ben Hutchings for finding it.)
+ - Attempted to sanitize the various DLL macros.  GC_USE_DLL disappeared.
+   GC_DLL is used instead.  All internal tests are now on GC_DLL.
+   README.macros is now more precise about the intended meaning.
+ - Include DllMain in the multithreaded win32 version only if the
+   collector is actually built as a dll.  (Thanks to Mohan Embar for
+   a version of the patch.)
+ - Hide the cygwin threadAttach/Detach functions.  They were violating our
+   namespace rules.  
+ - Fixed an assertion in GC_check_heap_proc.  Added GC_STATIC_ASSERT.
+   (Thanks again to Ben Hutchings.)
+ - Removed some obsolete definitions for Linux/PowerPC in gcconfig.h.
+ - CORD_cat was not rebalancing unbalanced trees in some cases, violating
+   a CORD invariant.  Also tweaked the rebalancing rule for
+   CORD_cat_char_star.  (Thanks to Alexandr Petrosian for the bug report
+   and patch.)
+ - Added hand-coded structured exception handling support to mark.c.
+   This should enable support of dynamic libraries under win32 with
+   gcc-compiled code.  (Thanks to Ranjit Mathew for the patch.)
+   Turned on dynamic library scanning for win32/gcc.
+ - Removed some remnants of read wrapping.  (Thanks to Kenneth Schalk.)
+   GC_USE_LD_WRAP ws probably broken in recent versions.
+ - The build could fail on some platforms since gcconfig.h could include
+   declarations mentioning ptr_t, which was not defined, e.g. when if_mach
+   was built.  (Thanks to Yann Dirson for pointing this out.)  Also
+   cleaned up tests for GC_PRIVATE_H in gcconfig.h a bit. 
+ - The GC_LOOP_ON_ABORT environment variable interfered with incremental
+   collection, since the write fault handler was erroneously overridden.
+   Handlers are now set up in the correct order.
+ - It used to be possible to call GC_mark_thread_local_free_lists() while
+   the world was not stopped during an incremental GC.  This was not safe.
+   Fortunately, it was also unnecessary.  Added GC_world_stopped flag
+   to avoid it.  (This caused occasional crashes in GC_set_fl_marks
+   with thread local allocation and incremental GC.  This probably happened
+   primarily on old, slow multiprocessors.)
+ - Allowed overriding of MAX_THREADS in win32_threads.c from the build
+   command line.  (Patch from Yannis Bres.)
+ - Taught the IA64/linux code to determine the register backing store base from
+   /proc/self/maps after checking the __libc symbol, but before guessing.
+   (__libc symbols are on the endangered list, and the guess is likely to not
+   always be right for 2.6 kernels.)  Restructured the code to read and parse
+   /proc/self/maps so it only exists in one place (all platforms).
+ - The -DUSE_PROC_FOR_LIBRARIES code was broken on Linux.  It claimed that it
+   also registered the main data segment, but didn't actually do so.  (I don't
+   think anyone actually uses this configuration, but ...)
+ - Made another attempt to get --enablecplusplus to do the right thing.
+   Since there are unavoidable problems with C programs linking against a
+   dynamic library that includes C++ code, I separated out the c++ code into
+   libgccpp.
+
+Since 6.2alpha5:
+ - There was extra underscore in the name of GC_save_registers_in_stack
+   for NetBSD/SPARC.  (Thanks to Jaap Boender for the patch.)
+ - Integrated Brian Alliet's patch for Darwin.  This restructured the
+   linuxthreads/pthreads support to separate generic pthreads support
+   from more the system-dependent thread-stopping code.  I believe this
+   should make it easier to eliminate the code duplication between
+   pthreads platforms in the future.  The patch included some other
+   code cleanups.
+ - Integrated Dan Bonachea's patch to support AIX threads.  This required
+   substantial manual integration, mostly due to conflicts with other
+   recent threads changes.  It may take another iteration to
+   get it to work.
+ - Removed HPUX/PA-RISC support from aix_irix_threads.c.  It wasn't used
+   anyway and it cluttered up the code.  And anything we can do to migrate
+   towards generic pthreads support is a good thing.
+ - Added a more explicit test for tracing of function arguments to test.c.
+   (Thanks to Dan Grayson.)
+ - Added Akira Tagoh's PowerPC64 patch.
+ - Fixed some bit rot in the Cygwin port.  (Thanks to Dan Bonachea for
+   pointing it out.)  Gc.h now includes just windows.h, not winbase.h.
+ - Declared GC_save_regs_in_stack() in gc_priv.h.  Remove other declarations.
+ - Changed --enable-cplusplus to use automake consitionals.  The old way
+   confused libtool.  "Make install" didn't work correctly for the old version.
+   Previously --enable-cplusplus was broken on cygwin.
+ - Changed the C version of GC_push_regs to fail at compile time if it is
+   generated with an empty body.  This seems to have been the cause of one
+   or two subtle failures on unusual platforms.  Those failures should
+   now occur at build time and be easily fixable.
+
+Since 6.2alpha6:
+ - Integrated a second round of Irix/AIX patches from Dan Bonachea.
+   Renamed mips_sgi_mach_dep.S back to mips_sgi_mach_dep.s, since it requires
+   the Irix assembler to do the C preprocessing; gcc -E doesn't work.
+ - Fixed Makefile.direct for DARWIN.  (Thanks to Manuel Serrano.)
+ - There was a race between GC_pthread_detach and thread exit that could
+   result in a thread structure being deallocated by GC_pthread_detach
+   eventhough it was still needed by the thread exit code.  (Thanks to
+   Dick Porter for the small test case that allowed this to be debugged.)
+ - Fixed version parsing for non-alpha versions in acinclude.m4 and
+   version checking in version.h.
+    
+
 To do:
+ - A dynamic libgc.so references dlopen unconditionally, but doesn't link
+   against libdl.
+ - GC_proc_fd for Solaris is not correctly updated in response to a
+   fork() call.  Thus incremental collection in the child won't work
+   correctly.  (Thanks to Ben Cottrell for pointing this out.)
  - --enable-redirect-malloc is mostly untested and known not to work
    on some platforms. 
- - The win32 collector ends up tracing some (most?) objects allocated with
-   the system allocator, in spite if the fact that it tries not to.
-   This costs time and space, though it remains correct.
-   We need a way to identify memory regions used by the system malloc(),
-   or an alternate way to locate dll data areas.  A very partial
-   workaround is to use GC_malloc_atomic_uncollectable() instead of
-   the system malloc() for most allocation.
  - There seem to be outstanding issues on Solaris/X86, possibly with
    finding the data segment starting address.  Information/patches would
    be appreciated.
diff --git a/doc/README.darwin b/doc/README.darwin
new file mode 100644
index 00000000..5bca9e18
--- /dev/null
+++ b/doc/README.darwin
@@ -0,0 +1,86 @@
+Darwin/MacOSX Support - May 20, 2003
+====================================
+
+Important Usage Notes
+=====================
+
+GC_init() MUST be called before calling any other GC functions. This 
+is necessary to properly register segments in dynamic libraries. This
+call is required even if you code does not use dynamic libraries as the
+dyld code handles registering all data segments.
+
+The incremental collector is still a bit flaky on darwin. It seems to 
+work reliably with workarounds for a few possible bugs in place however
+these workaround may not work correctly in all cases. There may also
+be additional problems that I have not found. 
+
+Implementation Information
+==========================
+Darwin/MacOSX support is nearly complete. Thread support is reliable on 
+Darwin 6.x (MacOSX 10.2) and there have been reports of success on older
+Darwin versions (MacOSX 10.1). Shared library support had also been
+added and the gc can be run from a shared library. There is currently only
+support for Darwin/PPC although adding x86 support should be trivial.
+
+Thread support is implemented in terms of mach thread_suspend and 
+thread_resume calls. These provide a very clean interface to thread
+suspension. This implementation doesn't rely on pthread_kill so the
+code works on Darwin < 6.0 (MacOSX 10.1). All the code to stop the
+world is located in darwin_stop_world.c.
+
+The original incremental collector support unfortunatelly no longer works
+on recent Darwin versions. It also relied on some undocumented kernel
+structures. Mach, however, does have a very clean interface to exception
+handing. The current implementation uses Mach's exception handling. 
+
+Much thanks goes to Andrew Stone, Dietmar Planitzer, Andrew Begel, 
+Jeff Sturm, and Jesse Rosenstock for all their work on the 
+Darwin/OS X port.
+
+-Brian Alliet
+brian@brianweb.net
+
+
+Older Information (Most of this no longer applies to the current code)
+======================================================================
+
+While the GC should work on MacOS X Server, MacOS X and Darwin, I only tested
+it on MacOS X Server.
+I've added a PPC assembly version of GC_push_regs(), thus the setjmp() hack is
+no longer necessary. Incremental collection is supported via mprotect/signal.
+The current solution isn't really optimal because the signal handler must decode
+the faulting PPC machine instruction in order to find the correct heap address.
+Further, it must poke around in the register state which the kernel saved away
+in some obscure register state structure before it calls the signal handler -
+needless to say the layout of this structure is no where documented.
+Threads and dynamic libraries are not yet supported (adding dynamic library
+support via the low-level dyld API shouldn't be that hard).
+
+The original MacOS X port was brought to you by Andrew Stone.
+
+
+June, 1 2000
+
+Dietmar Planitzer
+dave.pl@ping.at
+
+Note from Andrew Begel:
+
+One more fix to enable gc.a to link successfully into a shared library for
+MacOS X. You have to add -fno-common to the CFLAGS in the Makefile. MacOSX
+disallows common symbols in anything that eventually finds its way into a
+shared library. (I don't completely understand why, but -fno-common seems to
+work and doesn't mess up the garbage collector's functionality).
+
+Feb 26, 2003
+
+Jeff Sturm and Jesse Rosenstock provided a patch that adds thread support.
+GC_MACOSX_THREADS should be defined in the build and in clients.  Real
+dynamic library support is still missing, i.e. dynamic library data segments
+are still not scanned.  Code that stores pointers to the garbage collected
+heap in statically allocated variables should not reside in a dynamic
+library.  This still doesn't appear to be 100% reliable.  
+
+Mar 10, 2003
+Brian Alliet contributed dynamic library support for MacOSX.  It could also
+use more testing.
diff --git a/doc/README.macros b/doc/README.macros
index d9df8dd3..b5fe6796 100644
--- a/doc/README.macros
+++ b/doc/README.macros
@@ -51,7 +51,18 @@ _DLL		Defined by Visual C++ if dynamic libraries are being built
 		__declspec(dllexport) needs to be added to declarations
 		to support the case in which the collector is in a dll.
 
-GC_DLL		User-settable macro that forces the effect of _DLL.
+GC_DLL		User-settable macro that forces the effect of _DLL.  Set
+		by gc.h if _DLL is defined and GC_NOT_DLL is undefined.
+		This is the macro that is tested internally to determine
+		whether the GC is in its own dynamic library.  May need
+		to be set by clients before including gc.h.  Note that
+		inside the GC implementation it indicates that the
+		collector is in its own dynamic library, should export
+		its symbols, etc.  But in clients it indicates that the
+		GC resides in a different DLL, its entry points should
+		be referenced accordingly, and precautions may need to
+		be taken to properly deal with statically allocated 
+		variables in the main program.  Used only for MS Windows.
 
 GC_NOT_DLL	User-settable macro that overrides _DLL, e.g. if dynamic
 		libraries are used, but the collector is in a static library.
diff --git a/doc/README.win32 b/doc/README.win32
index dcccec3a..a40b375f 100644
--- a/doc/README.win32
+++ b/doc/README.win32
@@ -25,7 +25,7 @@ Win32 applications compiled with some flavor of gcc currently behave
 like win32s applications, in that dynamic library data segments are
 not scanned.  (Gcc does not directly support Microsoft's "structured
 exception handling".  It turns out that use of this feature is
-unavoidable if you scan arbirtray memory segments obtained from
+unavoidable if you scan arbitrary memory segments obtained from
 VirtualQuery.)
 
 The collector test program "gctest" is linked as a GUI application,
@@ -65,6 +65,12 @@ MAKEFILE.  (Make sure that the CPU environment variable is defined
 to be i386.)  In order to use the gc_cpp.h C++ interface, all
 client code should include gc_cpp.h.
 
+If you would prefer a VC++.NET project file, ask boehm@acm.org.  One has
+been contributed, but it seems to contain some absolute paths etc., so
+it can presumably only be a starting point, and is not in the standard
+distribution.  It is unclear (to me, Hans Boehm) whether it is feasible to
+change that.
+
 Clients may need to define GC_NOT_DLL before including gc.h, if the
 collector was built as a static library (as it normally is in the
 absence of thread support).
@@ -155,7 +161,7 @@ To compile the collector and testing programs use the command:
 All programs using gc should be compiled with 4-byte alignment.
 For further explanations on this see comments about Borland.
 
-If gc compiled as dll, the macro ``GC_DLL'' should be defined before
+If the gc is compiled as dll, the macro ``GC_DLL'' should be defined before
 including "gc.h" (for example, with -DGC_DLL compiler option). It's
 important, otherwise resulting programs will not run.
 
diff --git a/doc/gcdescr.html b/doc/gcdescr.html
index 65e8a8f6..8ecbac8d 100644
--- a/doc/gcdescr.html
+++ b/doc/gcdescr.html
@@ -1,7 +1,7 @@
 <HTML>
 <HEAD>
     <TITLE> Conservative GC Algorithmic Overview </TITLE>
-    <AUTHOR> Hans-J. Boehm, Silicon Graphics</author>
+    <AUTHOR> Hans-J. Boehm, HP Labs (Much of this was written at SGI)</author>
 </HEAD>
 <BODY>
 <H1> <I>This is under construction</i> </h1>
@@ -96,20 +96,24 @@ typically on the order of the page size.
 <P>
 Large block sizes are rounded up to
 the next multiple of <TT>HBLKSIZE</tt> and then allocated by
-<TT>GC_allochblk</tt>.  This uses roughly what Paul Wilson has termed
-a "next fit" algorithm, i.e. first-fit with a rotating pointer.
-The implementation does check for a better fitting immediately
-adjacent block, which gives it somewhat better fragmentation characteristics.
-I'm now convinced it should use a best fit algorithm.  The actual
+<TT>GC_allochblk</tt>.  Recent versions of the collector
+use an approximate best fit algorithm by keeping free lists for
+several large block sizes.
+The actual
 implementation of <TT>GC_allochblk</tt>
 is significantly complicated by black-listing issues
 (see below).
 <P>
-Small blocks are allocated in blocks of size <TT>HBLKSIZE</tt>.
-Each block is
+Small blocks are allocated in chunks of size <TT>HBLKSIZE</tt>.
+Each chunk is
 dedicated to only one object size and kind.  The allocator maintains
 separate free lists for each size and kind of object.
 <P>
+Once a large block is split for use in smaller objects, it can only
+be used for objects of that size, unless the collector discovers a completely
+empty chunk.  Completely empty chunks are restored to the appropriate
+large block free list.
+<P>
 In order to avoid allocating blocks for too many distinct object sizes,
 the collector normally does not directly allocate objects of every possible
 request size.  Instead request are rounded up to one of a smaller number
@@ -139,27 +143,35 @@ expand the heap.  Otherwise, we initiate a garbage collection.  This ensures
 that the amount of garbage collection work per allocated byte remains
 constant.
 <P>
-The above is in fat an oversimplification of the real heap expansion
-heuristic, which adjusts slightly for root size and certain kinds of
-fragmentation.  In particular, programs with a large root set size and
+The above is in fact an oversimplification of the real heap expansion
+and GC triggering heuristic, which adjusts slightly for root size
+and certain kinds of
+fragmentation.  In particular:
+<UL>
+<LI> Programs with a large root set size and
 little live heap memory will expand the heap to amortize the cost of
-scanning the roots.
-<P>
-Versions 5.x of the collector actually collect more frequently in
+scanning the roots.  
+<LI> Versions 5.x of the collector actually collect more frequently in
 nonincremental mode.  The large block allocator usually refuses to split
 large heap blocks once the garbage collection threshold is
 reached.  This often has the effect of collecting well before the
 heap fills up, thus reducing fragmentation and working set size at the
-expense of GC time.  6.x will chose an intermediate strategy depending
+expense of GC time.  Versions 6.x choose an intermediate strategy depending
 on how much large object allocation has taken place in the past.
 (If the collector is configured to unmap unused pages, versions 6.x
-will use the 5.x strategy.)
-<P>
-(It has been suggested that this should be adjusted so that we favor
+use the 5.x strategy.)
+<LI> In calculating the amount of allocation since the last collection we
+give partial credit for objects we expect to be explicitly deallocated.
+Even if all objects are explicitly managed, it is often desirable to collect
+on rare occasion, since that is our only mechanism for coalescing completely
+empty chunks.
+</ul>
+<P>
+It has been suggested that this should be adjusted so that we favor
 expansion if the resulting heap still fits into physical memory.
 In many cases, that would no doubt help.  But it is tricky to do this
 in a way that remains robust if multiple application are contending
-for a single pool of physical memory.)
+for a single pool of physical memory.
 
 <H2>Mark phase</h2>
 
@@ -204,7 +216,7 @@ changes to
 <LI> <TT>MS_NONE</tt> indicating that reachable objects are marked.
 </ol>
 
-The core mark routine <TT>GC_mark_from_mark_stack</tt>, is called
+The core mark routine <TT>GC_mark_from</tt>, is called
 repeatedly by several of the sub-phases when the mark stack starts to fill
 up.  It is also called repeatedly in <TT>MS_ROOTS_PUSHED</tt> state
 to empty the mark stack.
@@ -213,6 +225,12 @@ each call, so that it can also be used by the incremental collector.
 It is fairly carefully tuned, since it usually consumes a large majority
 of the garbage collection time.
 <P>
+The fact that it perform a only a small amount of work per call also
+allows it to be used as the core routine of the parallel marker.  In that
+case it is normally invoked on thread-private mark stacks instead of the
+global mark stack.  More details can be found in
+<A HREF="scale.html">scale.html</a>
+<P>
 The marker correctly handles mark stack overflows.  Whenever the mark stack
 overflows, the mark state is reset to <TT>MS_INVALID</tt>.
 Since there are already marked objects in the heap,
@@ -281,7 +299,8 @@ Unmarked large objects are immediately returned to the large object free list.
 Each small object page is checked to see if all mark bits are clear.
 If so, the entire page is returned to the large object free list.
 Small object pages containing some reachable object are queued for later
-sweeping.
+sweeping, unless we determine that the page contains very little free
+space, in which case it is not examined further.
 <P>
 This initial sweep pass touches only block headers, not
 the blocks themselves.  Thus it does not require significant paging, even
@@ -341,12 +360,16 @@ object itself becomes marked, we have uncovered
 a cycle involving the object.  This usually results in a warning from the
 collector.  Such objects are not finalized, since it may be
 unsafe to do so.  See the more detailed
-<A HREF="finalization.html"> discussion of finalization semantics</a>.
+<A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/finalization.html"> discussion of finalization semantics</a>.
 <P>
 Any objects remaining unmarked at the end of this process are added to
 a queue of objects whose finalizers can be run.  Depending on collector
 configuration, finalizers are dequeued and run either implicitly during
 allocation calls, or explicitly in response to a user request.
+(Note that the former is unfortunately both the default and not generally safe.
+If finalizers perform synchronization, it may result in deadlocks.
+Nontrivial finalizers generally need to perform synchronization, and
+thus require a different collector configuration.)
 <P>
 The collector provides a mechanism for replacing the procedure that is
 used to mark through objects.  This is used both to provide support for
@@ -354,13 +377,14 @@ Java-style unordered finalization, and to ignore certain kinds of cycles,
 <I>e.g.</i> those arising from C++ implementations of virtual inheritance.
 
 <H2>Generational Collection and Dirty Bits</h2>
-We basically use the parallel and generational GC algorithm described in
-<A HREF="papers/pldi91.ps.gz">"Mostly Parallel Garbage Collection"</a>,
+We basically use the concurrent and generational GC algorithm described in
+<A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/papers/pldi91.ps.Z">"Mostly Parallel Garbage Collection"</a>,
 by Boehm, Demers, and Shenker.
 <P>
 The most significant modification is that
-the collector always runs in the allocating thread.
-There is no separate garbage collector thread.
+the collector always starts running in the allocating thread.
+There is no separate garbage collector thread.  (If parallel GC is
+enabled, helper threads may also be woken up.)
 If an allocation attempt either requests a large object, or encounters
 an empty small object free list, and notices that there is a collection
 in progress, it immediately performs a small amount of marking work
@@ -389,50 +413,108 @@ cannot be satisfied from small object free lists. When marking completes,
 the set of modified pages is retrieved, and we mark once again from
 marked objects on those pages, this time with the mutator stopped.
 <P>
-We keep track of modified pages using one of three distinct mechanisms:
+We keep track of modified pages using one of several distinct mechanisms:
 <OL>
 <LI>
 Through explicit mutator cooperation.  Currently this requires
-the use of <TT>GC_malloc_stubborn</tt>.
+the use of <TT>GC_malloc_stubborn</tt>, and is rarely used.
 <LI>
-By write-protecting physical pages and catching write faults.  This is
+(<TT>MPROTECT_VDB</tt>) By write-protecting physical pages and
+catching write faults.  This is
 implemented for many Unix-like systems and for win32.  It is not possible
 in a few environments.
 <LI>
-By retrieving dirty bit information from /proc.  (Currently only Sun's
+(<TT>PROC_VDB</tt>) By retrieving dirty bit information from /proc.
+(Currently only Sun's
 Solaris supports this.  Though this is considerably cleaner, performance
 may actually be better with mprotect and signals.)
+<LI>
+(<TT>PCR_VDB</tt>) By relying on an external dirty bit implementation, in this
+case the one in Xerox PCR.
+<LI>
+(<TT>DEFAULT_VDB</tt>) By treating all pages as dirty.  This is the default if 
+none of the other techniques is known to be usable, and
+<TT>GC_malloc_stubborn</tt> is not used.  Practical only for testing, or if
+the vast majority of objects use <TT>GC_malloc_stubborn</tt>.
 </ol>
 
+<H2>Black-listing</h2>
+
+The collector implements <I>black-listing</i> of pages, as described
+in
+<A HREF="http://www.acm.org/pubs/citations/proceedings/pldi/155090/p197-boehm/">
+Boehm, ``Space Efficient Conservative Collection'', PLDI '93</a>, also available
+<A HREF="papers/pldi93.ps.Z">here</a>.
+<P>
+During the mark phase, the collector tracks ``near misses'', i.e. attempts
+to follow a ``pointer'' to just outside the garbage-collected heap, or
+to a currently unallocated page inside the heap.  Pages that have been
+the targets of such near misses are likely to be the targets of
+misidentified ``pointers'' in the future.  To minimize the future
+damage caused by such misidentifications they will be allocated only to
+small pointerfree objects. 
+<P>
+The collector understands two different kinds of black-listing.  A
+page may be black listed for interior pointer references
+(<TT>GC_add_to_black_list_stack</tt>), if it was the target of a near
+miss from a location that requires interior pointer recognition,
+<I>e.g.</i> the stack, or the heap if <TT>GC_all_interior_pointers</tt>
+is set.  In this case, we also avoid allocating large blocks that include
+this page.
+<P>
+If the near miss came from a source that did not require interior
+pointer recognition, it is black-listed with
+<TT>GC_add_to_black_list_normal</tt>.
+A page black-listed in this way may appear inside a large object,
+so long as it is not the first page of a large object.
+<P>
+The <TT>GC_allochblk</tt> routine respects black-listing when assigning
+a block to a particular object kind and size.  It occasionally
+drops (i.e. allocates and forgets) blocks that are completely black-listed
+in order to avoid excessively long large block free lists containing
+only unusable blocks.  This would otherwise become an issue
+if there is low demand for small pointerfree objects.
+
 <H2>Thread support</h2>
 We support several different threading models.  Unfortunately Pthreads,
 the only reasonably well standardized thread model, supports too narrow
 an interface for conservative garbage collection.  There appears to be
-no portable way to allow the collector to coexist with various Pthreads
+no completely portable way to allow the collector to coexist with various Pthreads
 implementations.  Hence we currently support only a few of the more
 common Pthreads implementations.
 <P>
 In particular, it is very difficult for the collector to stop all other
 threads in the system and examine the register contents.  This is currently
-accomplished with very different mechanisms for different Pthreads
+accomplished with very different mechanisms for some Pthreads
 implementations.  The Solaris implementation temporarily disables much
 of the user-level threads implementation by stopping kernel-level threads
-("lwp"s).  The Irix implementation sends signals to individual Pthreads
-and has them wait in the signal handler.  The Linux implementation
-is similar in spirit to the Irix one.
+("lwp"s).  The Linux/HPUX/OSF1 and Irix implementations sends signals to
+individual Pthreads and has them wait in the signal handler.
 <P>
-The Irix implementation uses
-only documented Pthreads calls, but relies on extensions to their semantics,
-notably the use of mutexes and condition variables from signal
-handlers.  The Linux implementation should be far closer to
-portable, though impirically it is not completely portable.
+The Linux and Irix implementations use
+only documented Pthreads calls, but rely on extensions to their semantics.
+The Linux implementation <TT>linux_threads.c</tt> relies on only very
+mild extensions to the pthreads semantics, and already supports a large number
+of other Unix-like pthreads implementations.  Our goal is to make this the
+only pthread support in the collector.
+<P>
+(The Irix implementation is separate only for historical reasons and should
+clearly be merged.  The current Solaris implementation probably performs
+better in the uniprocessor case, but does not support thread operations in the
+collector.  Hence it cannot support the parallel marker.)
 <P>
 All implementations must
 intercept thread creation and a few other thread-specific calls to allow
 enumeration of threads and location of thread stacks.  This is current
-accomplished with <TT># define</tt>'s in <TT>gc.h</tt>, or optionally
+accomplished with <TT># define</tt>'s in <TT>gc.h</tt>
+(really <TT>gc_pthread_redirects.h</tt>), or optionally
 by using ld's function call wrapping mechanism under Linux.
 <P>
 Comments are appreciated.  Please send mail to
-<A HREF="mailto:boehm@acm.org"><TT>boehm@acm.org</tt></a>
+<A HREF="mailto:boehm@acm.org"><TT>boehm@acm.org</tt></a> or
+<A HREF="mailto:Hans.Boehm@hp.com"><TT>Hans.Boehm@hp.com</tt></a>
+<P>
+This is a modified copy of a page written while the author was at SGI.
+The original was <A HREF="http://reality.sgi.com/boehm/gcdescr.html">here</a>.
 </body>
+</html>
diff --git a/doc/gcinterface.html b/doc/gcinterface.html
new file mode 100644
index 00000000..7b336ec8
--- /dev/null
+++ b/doc/gcinterface.html
@@ -0,0 +1,203 @@
+<!DOCTYPE HTML>
+<HEAD>
+<TITLE>Garbage Collector Interface</TITLE>
+</HEAD>
+<BODY>
+<H1>C Interface</h1>
+On many platforms, a single-threaded garbage collector library can be built
+to act as a plug-in malloc replacement.  (Build with -DREDIRECT_MALLOC=GC_malloc
+-DIGNORE_FREE.)  This is often the best way to deal with third-party libraries
+which leak or prematurely free objects.  -DREDIRECT_MALLOC is intended
+primarily as an easy way to adapt old code, not for new development.
+<P>
+New code should use the interface discussed below.
+<P>
+Code must be linked against the GC library.  On most UNIX platforms,
+this will be gc.a.
+<P>
+The following describes the standard C interface to the garbage collector.
+It is not a complete definition of the interface.  It describes only the
+most commonly used functionality, approximately in decreasing order of
+frequency of use.  The description assumes an ANSI C compiler.
+The full interface is described in
+<A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a>
+or <TT>gc.h</tt> in the distribution.
+<P>
+Clients should include gc.h.
+<P>
+In the case of multithreaded code,
+gc.h should be included after the threads header file, and
+after defining the appropriate GC_XXXX_THREADS macro.
+(For 6.2alpha4 and later, simply defining GC_THREADS should suffice.)
+Gc.h must be included
+in files that use either GC or threads primitives, since threads primitives
+will be redefined to cooperate with the GC on many platforms.
+<DL>
+<DT> <B>void * GC_MALLOC(size_t <I>nbytes</i>)</b>
+<DD>
+Allocates and clears <I>nbytes</i> of storage.
+Requires (amortized) time proportional to <I>nbytes</i>.
+The resulting object will be automatically deallocated when unreferenced.
+References from objects allocated with the system malloc are usually not
+considered by the collector.  (See GC_MALLOC_UNCOLLECTABLE, however.)
+GC_MALLOC is a macro which invokes GC_malloc by default or, if GC_DEBUG
+is defined before gc.h is included, a debugging version that checks
+occasionally for overwrite errors, and the like.
+<DT> <B>void * GC_MALLOC_ATOMIC(size_t <I>nbytes</i>)</b>
+<DD>
+Allocates <I>nbytes</i> of storage.
+Requires (amortized) time proportional to <I>nbytes</i>.
+The resulting object will be automatically deallocated when unreferenced.
+The client promises that the resulting object will never contain any pointers.
+The memory is not cleared.
+This is the preferred way to allocate strings, floating point arrays,
+bitmaps, etc.
+More precise information about pointer locations can be communicated to the
+collector using the interface in
+<A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gc_typedh.txt">gc_typed.h</a> in the distribution.
+<DT> <B>void * GC_MALLOC_UNCOLLECTABLE(size_t <I>nbytes</i>)</b>
+<DD>
+Identical to GC_MALLOC, except that the resulting object is not automatically
+deallocated.  Unlike the system-provided malloc, the collector does
+scan the object for pointers to garbage-collectable memory, even if the
+block itself does not appear to be reachable.  (Objects allocated in this way
+are effectively treated as roots by the collector.)
+<DT> <B> void * GC_REALLOC(void *old, size_t new_size) </b>
+<DD>
+Allocate a new object of the indicated size and copy (a prefix of) the
+old object into the new object.  The old object is reused in place if
+convenient.  If the original object was allocated with GC_malloc_atomic,
+the new object is subject to the same constraints.  If it was allocated
+as an uncollectable object, then the new object is uncollectable, and
+the old object (if different) is deallocated.
+(Use GC_REALLOC with GC_MALLOC, etc.)
+<DT> <B> void GC_FREE(void *dead) </b>
+<DD>
+Explicitly deallocate an object.  Typically not useful for small
+collectable objects.  (Use GC_FREE with GC_MALLOC, etc.)
+<DT> <B> void * GC_MALLOC_IGNORE_OFF_PAGE(size_t <I>nbytes</i>) </b>
+<DD>
+<DT> <B> void * GC_MALLOC_ATOMIC_IGNORE_OFF_PAGE(size_t <I>nbytes</i>) </b>
+<DD>
+Analogous to GC_MALLOC and GC_MALLOC_ATOMIC, except that the client
+guarantees that as long
+as the resulting object is of use, a pointer is maintained to someplace
+inside the first 512 bytes of the object.  This pointer should be declared
+volatile to avoid interference from compiler optimizations.
+(Other nonvolatile pointers to the object may exist as well.)
+This is the
+preferred way to allocate objects that are likely to be > 100KBytes in size.
+It greatly reduces the risk that such objects will be accidentally retained
+when they are no longer needed.  Thus space usage may be significantly reduced.
+<DT> <B> void GC_gcollect(void) </b>
+<DD>
+Explicitly force a garbage collection.
+<DT> <B> void GC_enable_incremental(void) </b>
+<DD>
+Cause the garbage collector to perform a small amount of work
+every few invocations of GC_malloc or the like, instead of performing
+an entire collection at once.  This is likely to increase total
+running time.  It will improve response on a platform that either has
+suitable support in the garbage collector (Irix and most other Unix
+versions, win32 if the collector was suitably built) or if "stubborn"
+allocation is used (see <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a>).
+On many platforms this interacts poorly with system calls 
+that write to the garbage collected heap.
+<DT> <B> GC_warn_proc GC_set_warn_proc(GC_warn_proc p) </b>
+<DD>
+Replace the default procedure used by the collector to print warnings.
+The collector
+may otherwise write to sterr, most commonly because GC_malloc was used
+in a situation in which GC_malloc_ignore_off_page would have been more
+appropriate.  See <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> for details.
+<DT> <B> void GC_register_finalizer(...) </b>
+<DD>
+Register a function to be called when an object becomes inaccessible.
+This is often useful as a backup method for releasing system resources
+(<I>e.g.</i> closing files) when the object referencing them becomes
+inaccessible.
+It is not an acceptable method to perform actions that must be performed
+in a timely fashion.
+See <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> for details of the interface.
+See <A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/finalization.html">here</a> for a more detailed discussion
+of the design.
+<P>
+Note that an object may become inaccessible before client code is done
+operating on its fields.  Suitable synchronization is usually required.
+See <A HREF="http://portal.acm.org/citation.cfm?doid=604131.604153">here</a>
+or <A HREF="http://www.hpl.hp.com/techreports/2002/HPL-2002-335.html">here</a>
+for details.
+</dl>
+<P>
+If you are concerned with multiprocessor performance and scalability,
+you should consider enabling and using thread local allocation (<I>e.g.</i>
+GC_LOCAL_MALLOC, see <TT>gc_local_alloc.h</tt>.  If your platform
+supports it, you should build the collector with parallel marking support
+(-DPARALLEL_MARK, or --enable-parallel-mark).
+<P>
+If the collector is used in an environment in which pointer location
+information for heap objects is easily available, this can be passed on
+to the colllector using the interfaces in either <TT>gc_typed.h</tt>
+or <TT>gc_gcj.h</tt>.
+<P>
+The collector distribution also includes a <B>string package</b> that takes
+advantage of the collector.  For details see
+<A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/cordh.txt">cord.h</a>
+
+<H1>C++ Interface</h1>
+There are three distinct ways to use the collector from C++:
+<DL>
+<DT> <B> STL allocators </b>
+<DD>
+Users of the <A HREF="http://www.sgi.com/tech/stl">SGI extended STL</a>
+can include <TT>new_gc_alloc.h</tt> before including
+STL header files.
+(<TT>gc_alloc.h</tt> corresponds to now obsolete versions of the
+SGI STL.)
+This defines SGI-style allocators
+<UL>
+<LI> alloc
+<LI> single_client_alloc
+<LI> gc_alloc
+<LI> single_client_gc_alloc
+</ul>
+which may be used either directly to allocate memory or to instantiate
+container templates.  The first two allocate uncollectable but traced
+memory, while the second two allocate collectable memory.
+The single_client versions are not safe for concurrent access by
+multiple threads, but are faster.
+<P>
+For an example, click <A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_alloc_exC.txt">here</a>.
+<P>
+Recent versions of the collector also include a more standard-conforming
+allocator implemention in <TT>gc_allocator.h</tt>.  It defines
+<UL>
+<LI> traceable_allocator
+<LI> gc_allocator
+</ul>
+Again the former allocates uncollectable but traced memory.
+This should work with any fully standard-conforming C++ compiler.
+<DT> <B> Class inheritance based interface </b>
+<DD>
+Users may include gc_cpp.h and then cause members of certain classes to
+be allocated in garbage collectable memory by inheriting from class gc.
+For details see <A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gc_cpph.txt">gc_cpp.h</a>.
+<DT> <B> C interface </b>
+<DD>
+It is also possible to use the C interface from 
+<A HREF="http://hpl.hp.com/personal/Hans_Boehm/gc/gc_source/gch.txt">gc.h</a> directly.
+On platforms which use malloc to implement ::new, it should usually be possible
+to use a version of the collector that has been compiled as a malloc
+replacement.  It is also possible to replace ::new and other allocation
+functions suitably.
+<P>
+Note that user-implemented small-block allocation often works poorly with
+an underlying garbage-collected large block allocator, since the collector
+has to view all objects accessible from the user's free list as reachable.
+This is likely to cause problems if GC_malloc is used with something like
+the original HP version of STL.
+This approach works with the SGI versions of the STL only if the
+<TT>malloc_alloc</tt> allocator is used.
+</dl>
+</body>
+</html>
diff --git a/doc/scale.html b/doc/scale.html
new file mode 100644
index 00000000..2e70148d
--- /dev/null
+++ b/doc/scale.html
@@ -0,0 +1,210 @@
+<HTML>
+<HEAD>
+<TITLE>Garbage collector scalability</TITLE>
+</HEAD>
+<BODY>
+<H1>Garbage collector scalability</h1>
+In its default configuration, the Boehm-Demers-Weiser garbage collector
+is not thread-safe.  It can be made thread-safe for a number of environments
+by building the collector with the appropriate
+<TT>-D</tt><I>XXX</i><TT>-THREADS</tt> compilation
+flag.  This has primarily two effects:
+<OL>
+<LI> It causes the garbage collector to stop all other threads when
+it needs to see a consistent memory state.
+<LI> It causes the collector to acquire a lock around essentially all
+allocation and garbage collection activity.
+</ol>
+Since a single lock is used for all allocation-related activity, only one
+thread can be allocating or collecting at one point.  This inherently
+limits performance of multi-threaded applications on multiprocessors.
+<P>
+On most platforms, the allocator/collector lock is implemented as a
+spin lock with exponential back-off.  Longer wait times are implemented
+by yielding and/or sleeping.  If a collection is in progress, the pure
+spinning stage is skipped.  This has the advantage that uncontested and
+thus most uniprocessor lock acquisitions are very cheap.  It has the
+disadvantage that the application may sleep for small periods of time
+even when there is work to be done.  And threads may be unnecessarily
+woken up for short periods.  Nonetheless, this scheme empirically
+outperforms native queue-based mutual exclusion implementations in most
+cases, sometimes drastically so.
+<H2>Options for enhanced scalability</h2>
+Version 6.0 of the collector adds two facilities to enhance collector
+scalability on multiprocessors.  As of 6.0alpha1, these are supported 
+only under Linux on X86 and IA64 processors, though ports to other
+otherwise supported Pthreads platforms should be straightforward.
+They are intended to be used together.
+<UL>
+<LI>
+Building the collector with <TT>-DPARALLEL_MARK</tt> allows the collector to
+run the mark phase in parallel in multiple threads, and thus on multiple
+processors.  The mark phase typically consumes the large majority of the
+collection time.  Thus this largely parallelizes the garbage collector
+itself, though not the allocation process.  Currently the marking is
+performed by the thread that triggered the collection, together with
+<I>N</i>-1 dedicated
+threads, where <I>N</i> is the number of processors detected by the collector.
+The dedicated threads are created once at initialization time.
+<P>
+A second effect of this flag is to switch to a more concurrent
+implementation of <TT>GC_malloc_many</tt>, so that free lists can be
+built, and memory can be cleared, by more than one thread concurrently.
+<LI>
+Building the collector with -DTHREAD_LOCAL_ALLOC adds support for thread
+local allocation.  It does not, by itself, cause thread local allocation
+to be used.  It simply allows the use of the interface in 
+<TT>gc_local_alloc.h</tt>.
+<P>
+Memory returned from thread-local allocators is completely interchangeable
+with that returned by the standard allocators.  It may be used by other
+threads.  The only difference is that, if the thread allocates enough
+memory of a certain kind, it will build a thread-local free list for
+objects of that kind, and allocate from that.  This greatly reduces
+locking.  The thread-local free lists are refilled using 
+<TT>GC_malloc_many</tt>.
+<P>
+An important side effect of this flag is to replace the default
+spin-then-sleep lock to be replace by a spin-then-queue based implementation.
+This <I>reduces performance</i> for the standard allocation functions,
+though it usually improves performance when thread-local allocation is
+used heavily, and thus the number of short-duration lock acquisitions
+is greatly reduced.
+</ul>
+<P>
+The easiest way to switch an application to thread-local allocation is to
+<OL>
+<LI> Define the macro <TT>GC_REDIRECT_TO_LOCAL</tt>,
+and then include the <TT>gc.h</tt>
+header in each client source file.
+<LI> Invoke <TT>GC_thr_init()</tt> before any allocation.
+<LI> Allocate using <TT>GC_MALLOC</tt>, <TT>GC_MALLOC_ATOMIC</tt>,
+and/or <TT>GC_GCJ_MALLOC</tt>.
+</ol>
+<H2>The Parallel Marking Algorithm</h2>
+We use an algorithm similar to
+<A HREF="http://www.yl.is.s.u-tokyo.ac.jp/gc/">that developed by
+Endo, Taura, and Yonezawa</a> at the University of Tokyo.
+However, the data structures and implementation are different,
+and represent a smaller change to the original collector source,
+probably at the expense of extreme scalability.  Some of
+the refinements they suggest, <I>e.g.</i> splitting large
+objects, were also incorporated into out approach.
+<P>
+The global mark stack is transformed into a global work queue.
+Unlike the usual case, it never shrinks during a mark phase.
+The mark threads remove objects from the queue by copying them to a
+local mark stack and changing the global descriptor to zero, indicating
+that there is no more work to be done for this entry.
+This removal
+is done with no synchronization.  Thus it is possible for more than
+one worker to remove the same entry, resulting in some work duplication.
+<P>
+The global work queue grows only if a marker thread decides to
+return some of its local mark stack to the global one.  This
+is done if the global queue appears to be running low, or if
+the local stack is in danger of overflowing.  It does require
+synchronization, but should be relatively rare.
+<P>
+The sequential marking code is reused to process local mark stacks.
+Hence the amount of additional code required for parallel marking
+is minimal.
+<P>
+It should be possible to use generational collection in the presence of the
+parallel collector, by calling <TT>GC_enable_incremental()</tt>.
+This does not result in fully incremental collection, since parallel mark
+phases cannot currently be interrupted, and doing so may be too
+expensive.
+<P>
+Gcj-style mark descriptors do not currently mix with the combination
+of local allocation and incremental collection.  They should work correctly
+with one or the other, but not both.
+<P>
+The number of marker threads is set on startup to the number of
+available processors (or to the value of the <TT>GC_NPROCS</tt>
+environment variable).  If only a single processor is detected,
+parallel marking is disabled.
+<P>
+Note that setting GC_NPROCS to 1 also causes some lock acquisitions inside
+the collector to immediately yield the processor instead of busy waiting
+first.  In the case of a multiprocessor and a client with multiple
+simultaneously runnable threads, this may have disastrous performance
+consequences (e.g. a factor of 10 slowdown). 
+<H2>Performance</h2>
+We conducted some simple experiments with a version of
+<A HREF="gc_bench.html">our GC benchmark</a> that was slightly modified to
+run multiple concurrent client threads in the same address space.
+Each client thread does the same work as the original benchmark, but they share
+a heap.
+This benchmark involves very little work outside of memory allocation.
+This was run with GC 6.0alpha3 on a dual processor Pentium III/500 machine
+under Linux 2.2.12.
+<P>
+Running with a thread-unsafe collector,  the benchmark ran in 9
+seconds.  With the simple thread-safe collector,
+built with <TT>-DLINUX_THREADS</tt>, the execution time
+increased to 10.3 seconds, or 23.5 elapsed seconds with two clients.
+(The times for the <TT>malloc</tt>/i<TT>free</tt> version
+with glibc <TT>malloc</tt>
+are 10.51 (standard library, pthreads not linked),
+20.90 (one thread, pthreads linked),
+and 24.55 seconds respectively. The benchmark favors a
+garbage collector, since most objects are small.)
+<P>
+The following table gives execution times for the collector built
+with parallel marking and thread-local allocation support
+(<TT>-DGC_LINUX_THREADS -DPARALLEL_MARK -DTHREAD_LOCAL_ALLOC</tt>).  We tested
+the client using either one or two marker threads, and running
+one or two client threads.  Note that the client uses thread local
+allocation exclusively.  With -DTHREAD_LOCAL_ALLOC the collector
+switches to a locking strategy that is better tuned to less frequent
+lock acquisition.  The standard allocation primitives thus peform
+slightly worse than without -DTHREAD_LOCAL_ALLOC, and should be
+avoided in time-critical code.
+<P>
+(The results using <TT>pthread_mutex_lock</tt>
+directly for allocation locking would have been worse still, at
+least for older versions of linuxthreads.
+With THREAD_LOCAL_ALLOC, we first repeatedly try to acquire the
+lock with pthread_mutex_try_lock(), busy_waiting between attempts.
+After a fixed number of attempts, we use pthread_mutex_lock().)
+<P>
+These measurements do not use incremental collection, nor was prefetching
+enabled in the marker.  We used the C version of the benchmark.
+All measurements are in elapsed seconds on an unloaded machine.
+<P>
+<TABLE BORDER ALIGN="CENTER">
+<TR><TH>Number of threads</th><TH>1 marker thread (secs.)</th>
+<TH>2 marker threads (secs.)</th></tr>
+<TR><TD>1 client</td><TD ALIGN="CENTER">10.45</td><TD ALIGN="CENTER">7.85</td>
+<TR><TD>2 clients</td><TD ALIGN="CENTER">19.95</td><TD ALIGN="CENTER">12.3</td>
+</table>
+<PP>
+The execution time for the single threaded case is slightly worse than with
+simple locking.  However, even the single-threaded benchmark runs faster than
+even the thread-unsafe version if a second processor is available.
+The execution time for two clients with thread local allocation time is
+only 1.4 times the sequential execution time for a single thread in a
+thread-unsafe environment, even though it involves twice the client work.
+That represents close to a
+factor of 2 improvement over the 2 client case with the old collector.
+The old collector clearly
+still suffered from some contention overhead, in spite of the fact that the
+locking scheme had been fairly well tuned.
+<P>
+Full linear speedup (i.e. the same execution time for 1 client on one
+processor as 2 clients on 2 processors)
+is probably not achievable on this kind of
+hardware even with such a small number of processors,
+since the memory system is
+a major constraint for the garbage collector,
+the processors usually share a single memory bus, and thus
+the aggregate memory bandwidth does not increase in
+proportion to the number of processors. 
+<P>
+These results are likely to be very sensitive to both hardware and OS
+issues.  Preliminary experiments with an older Pentium Pro machine running
+an older kernel were far less encouraging.
+
+</body>
+</html>
diff --git a/doc/tree.html b/doc/tree.html
index 89c515da..c46a281c 100644
--- a/doc/tree.html
+++ b/doc/tree.html
@@ -1,13 +1,14 @@
 <HTML>
 <HEAD>
     <TITLE>  Two-Level Tree Structure for Fast Pointer Lookup</TITLE>
-    <AUTHOR> Hans-J. Boehm, Silicon Graphics</author>
+    <AUTHOR> Hans-J. Boehm, Silicon Graphics (now at HP)</author>
 </HEAD>
 <BODY>
 <H1>Two-Level Tree Structure for Fast Pointer Lookup</h1>
 <P>
 The conservative garbage collector described
-<A HREF="gc.html">here</a> uses a 2-level tree
+<A HREF="http://www.hpl.hp.com/personal/Hans_Boehm/gc/">here</a>
+uses a 2-level tree
 data structure to aid in fast pointer identification.
 This data structure is described in a bit more detail here, since
 <OL>
author	Ivan Maidanski <ivmai@mail.ru>	2011-07-26 17:28:12 +0400
committer	Ivan Maidanski <ivmai@mail.ru>	2011-07-26 17:28:12 +0400
commit	111a44f98adde07d205c92656ad9b935ca2a39a8 (patch)
tree	8a0cb4e60f636fd09dd0d2e1a3a7f3a4ac0a1bb0 /doc
parent	f3632431e72d48bc7772b0752e29bb1e2a0901c6 (diff)
download	bdwgc-111a44f98adde07d205c92656ad9b935ca2a39a8.tar.gz