Add h2defs.py and docextract_to_xml.py, removed from pygboject.

* tools/defs_gen/definitions.py: * tools/defs_gen/defsparser.py: * tools/defs_gen/docextract.py: * tools/defs_gen/docextract_to_xml.py: * tools/defs_gen/h2def.py: * tools/defs_gen/scmexpr.py: Add h2def.py and docextract_to_xml.py, and any .py files that they use, because they were removed from pygobject. * tools/Makefile.am: Add these to EXTRA_DIST.
author: Murray Cumming <murrayc@murrayc.com> 2011-07-19 10:58:59 +0200
committer: Murray Cumming <murrayc@murrayc.com> 2011-07-19 11:01:08 +0200
commit: be04a6c229efb204f96dbf9b50359ffe1b5e7d17 (patch)
tree: 2515d0ffd25f6d2dd277b810baa66646831ecdcd
parent: 7a6f7eef1c58d51e870aabada452ce2cb84a8ea5 (diff)
download: glibmm-be04a6c229efb204f96dbf9b50359ffe1b5e7d17.tar.gz
8 files changed, 2125 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index 26868647..81813ce3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2011-07-19  Murray Cumming  <murrayc@murrayc.com>
+
+	Add h2defs.py and docextract_to_xml.py, removed from pygboject.
+
+	* tools/defs_gen/definitions.py:
+	* tools/defs_gen/defsparser.py:
+	* tools/defs_gen/docextract.py:
+	* tools/defs_gen/docextract_to_xml.py:
+	* tools/defs_gen/h2def.py:
+	* tools/defs_gen/scmexpr.py: Add h2def.py and docextract_to_xml.py, 
+	and any .py files that they use, because they were removed from pygobject.
+	* tools/Makefile.am: Add these to EXTRA_DIST.
+	
 2011-07-18  José Alburquerque  <jaalburqu@svn.gnome.org>
 
 	gmmproc: _STRUCT_NOT_HIDDEN: Make macro local to class only.
diff --git a/tools/Makefile.am b/tools/Makefile.am
index d3ada483..d5d0dc4f 100644
--- a/tools/Makefile.am
+++ b/tools/Makefile.am
@@ -54,6 +54,13 @@ extra_defs_gen_generate_defs_gio_LDADD   = $(GIOMM_LIBS) $(lib_LTLIBRARIES)
 AM_CPPFLAGS = -I$(top_builddir) $(GTHREAD_CFLAGS) $(GIOMM_CFLAGS)
 AM_CXXFLAGS = $(GLIBMM_WXXFLAGS)
 
+EXTRA_DIST = defs_gen/definitions.py \
+  defs_gen/defsparser.py \
+  defs_gen/h2def.py \
+  defs_gen/scmexpr.py \
+  defs_gen/docextract.py \
+  defs_gen/docextract_to_xml.py
+
 # Instruct GNU make to delete the targets of a rule after it failed, in
 # order to avoid the complication of handling that situation manually.
 .DELETE_ON_ERROR:
diff --git a/tools/defs_gen/definitions.py b/tools/defs_gen/definitions.py
new file mode 100644
index 00000000..bfb6faff
--- /dev/null
+++ b/tools/defs_gen/definitions.py
@@ -0,0 +1,575 @@
+# -*- Mode: Python; py-indent-offset: 4 -*-
+import copy
+import sys
+
+def get_valid_scheme_definitions(defs):
+    return [x for x in defs if isinstance(x, tuple) and len(x) >= 2]
+
+def unescape(s):
+    s = s.replace('\r\n', '\\r\\n').replace('\t', '\\t')
+    return s.replace('\r', '\\r').replace('\n', '\\n')
+
+def make_docstring(lines):
+    return "(char *) " + '\n'.join(['"%s"' % unescape(s) for s in lines])
+
+# New Parameter class, wich emulates a tuple for compatibility reasons
+class Parameter(object):
+    def __init__(self, ptype, pname, pdflt, pnull, pdir=None):
+        self.ptype = ptype
+        self.pname = pname
+        self.pdflt = pdflt
+        self.pnull = pnull
+        self.pdir = pdir
+
+    def __len__(self): return 4
+    def __getitem__(self, i):
+        return (self.ptype, self.pname, self.pdflt, self.pnull)[i]
+
+    def merge(self, old):
+        if old.pdflt is not None:
+            self.pdflt = old.pdflt
+        if old.pnull is not None:
+            self.pnull = old.pnull
+
+# We currently subclass 'str' to make impact on the rest of codegen as
+# little as possible.  Later we can subclass 'object' instead, but
+# then we must find and adapt all places which expect return types to
+# be strings.
+class ReturnType(str):
+    def __new__(cls, *args, **kwds):
+        return str.__new__(cls, *args[:1])
+    def __init__(self, type_name, optional=False):
+        str.__init__(self)
+        self.optional = optional
+
+# Parameter for property based constructors
+class Property(object):
+    def __init__(self, pname, optional, argname):
+        self.pname = pname
+        self.optional = optional
+        self.argname = argname
+
+    def __len__(self): return 4
+    def __getitem__(self, i):
+        return ('', self.pname, self.optional, self.argname)[i]
+
+    def merge(self, old):
+        if old.optional is not None:
+            self.optional = old.optional
+        if old.argname is not None:
+            self.argname = old.argname
+
+
+class Definition(object):
+    docstring = "NULL"
+
+    def py_name(self):
+        return '%s.%s' % (self.module, self.name)
+
+    py_name = property(py_name)
+
+    def __init__(self, *args):
+        """Create a new defs object of this type.  The arguments are the
+        components of the definition"""
+        raise RuntimeError("this is an abstract class")
+
+    def merge(self, old):
+        """Merge in customisations from older version of definition"""
+        raise RuntimeError("this is an abstract class")
+
+    def write_defs(self, fp=sys.stdout):
+        """write out this definition in defs file format"""
+        raise RuntimeError("this is an abstract class")
+
+    def guess_return_value_ownership(self):
+        "return 1 if caller owns return value"
+        if getattr(self, 'is_constructor_of', False):
+            self.caller_owns_return = True
+        elif self.ret in ('char*', 'gchar*', 'string'):
+            self.caller_owns_return = True
+        else:
+            self.caller_owns_return = False
+
+
+class ObjectDef(Definition):
+    def __init__(self, name, *args):
+        self.name = name
+        self.module = None
+        self.parent = None
+        self.c_name = None
+        self.typecode = None
+        self.fields = []
+        self.implements = []
+        self.class_init_func = None
+        self.has_new_constructor_api = False
+        for arg in get_valid_scheme_definitions(args):
+            if arg[0] == 'in-module':
+                self.module = arg[1]
+            elif arg[0] == 'docstring':
+                self.docstring = make_docstring(arg[1:])
+            elif arg[0] == 'parent':
+                self.parent = arg[1]
+            elif arg[0] == 'c-name':
+                self.c_name = arg[1]
+            elif arg[0] == 'gtype-id':
+                self.typecode = arg[1]
+            elif arg[0] == 'fields':
+                for parg in arg[1:]:
+                    self.fields.append((parg[0], parg[1]))
+            elif arg[0] == 'implements':
+                self.implements.append(arg[1])
+    def merge(self, old):
+        # currently the .h parser doesn't try to work out what fields of
+        # an object structure should be public, so we just copy the list
+        # from the old version ...
+        self.fields = old.fields
+        self.implements = old.implements
+    def write_defs(self, fp=sys.stdout):
+        fp.write('(define-object ' + self.name + '\n')
+        if self.module:
+            fp.write('  (in-module "' + self.module + '")\n')
+        if self.parent != (None, None):
+            fp.write('  (parent "' + self.parent + '")\n')
+        for interface in self.implements:
+            fp.write('  (implements "' + interface + '")\n')
+        if self.c_name:
+            fp.write('  (c-name "' + self.c_name + '")\n')
+        if self.typecode:
+            fp.write('  (gtype-id "' + self.typecode + '")\n')
+        if self.fields:
+            fp.write('  (fields\n')
+            for (ftype, fname) in self.fields:
+                fp.write('    \'("' + ftype + '" "' + fname + '")\n')
+            fp.write('  )\n')
+        fp.write(')\n\n')
+
+class InterfaceDef(Definition):
+    def __init__(self, name, *args):
+        self.name = name
+        self.module = None
+        self.c_name = None
+        self.typecode = None
+        self.vtable = None
+        self.fields = []
+        self.interface_info = None
+        for arg in get_valid_scheme_definitions(args):
+            if arg[0] == 'in-module':
+                self.module = arg[1]
+            elif arg[0] == 'docstring':
+                self.docstring = make_docstring(arg[1:])
+            elif arg[0] == 'c-name':
+                self.c_name = arg[1]
+            elif arg[0] == 'gtype-id':
+                self.typecode = arg[1]
+            elif arg[0] == 'vtable':
+                self.vtable = arg[1]
+        if self.vtable is None:
+            self.vtable = self.c_name + "Iface"
+    def write_defs(self, fp=sys.stdout):
+        fp.write('(define-interface ' + self.name + '\n')
+        if self.module:
+            fp.write('  (in-module "' + self.module + '")\n')
+        if self.c_name:
+            fp.write('  (c-name "' + self.c_name + '")\n')
+        if self.typecode:
+            fp.write('  (gtype-id "' + self.typecode + '")\n')
+        fp.write(')\n\n')
+
+class EnumDef(Definition):
+    def __init__(self, name, *args):
+        self.deftype = 'enum'
+        self.name = name
+        self.in_module = None
+        self.c_name = None
+        self.typecode = None
+        self.values = []
+        for arg in get_valid_scheme_definitions(args):
+            if arg[0] == 'in-module':
+                self.in_module = arg[1]
+            elif arg[0] == 'c-name':
+                self.c_name = arg[1]
+            elif arg[0] == 'gtype-id':
+                self.typecode = arg[1]
+            elif arg[0] == 'values':
+                for varg in arg[1:]:
+                    self.values.append((varg[0], varg[1]))
+    def merge(self, old):
+        pass
+    def write_defs(self, fp=sys.stdout):
+        fp.write('(define-' + self.deftype + ' ' + self.name + '\n')
+        if self.in_module:
+            fp.write('  (in-module "' + self.in_module + '")\n')
+        fp.write('  (c-name "' + self.c_name + '")\n')
+        fp.write('  (gtype-id "' + self.typecode + '")\n')
+        if self.values:
+            fp.write('  (values\n')
+            for name, val in self.values:
+                fp.write('    \'("' + name + '" "' + val + '")\n')
+            fp.write('  )\n')
+        fp.write(')\n\n')
+
+class FlagsDef(EnumDef):
+    def __init__(self, *args):
+        apply(EnumDef.__init__, (self,) + args)
+        self.deftype = 'flags'
+
+class BoxedDef(Definition):
+    def __init__(self, name, *args):
+        self.name = name
+        self.module = None
+        self.c_name = None
+        self.typecode = None
+        self.copy = None
+        self.release = None
+        self.fields = []
+        for arg in get_valid_scheme_definitions(args):
+            if arg[0] == 'in-module':
+                self.module = arg[1]
+            elif arg[0] == 'c-name':
+                self.c_name = arg[1]
+            elif arg[0] == 'gtype-id':
+                self.typecode = arg[1]
+            elif arg[0] == 'copy-func':
+                self.copy = arg[1]
+            elif arg[0] == 'release-func':
+                self.release = arg[1]
+            elif arg[0] == 'fields':
+                for parg in arg[1:]:
+                    self.fields.append((parg[0], parg[1]))
+    def merge(self, old):
+        # currently the .h parser doesn't try to work out what fields of
+        # an object structure should be public, so we just copy the list
+        # from the old version ...
+        self.fields = old.fields
+    def write_defs(self, fp=sys.stdout):
+        fp.write('(define-boxed ' + self.name + '\n')
+        if self.module:
+            fp.write('  (in-module "' + self.module + '")\n')
+        if self.c_name:
+            fp.write('  (c-name "' + self.c_name + '")\n')
+        if self.typecode:
+            fp.write('  (gtype-id "' + self.typecode + '")\n')
+        if self.copy:
+            fp.write('  (copy-func "' + self.copy + '")\n')
+        if self.release:
+            fp.write('  (release-func "' + self.release + '")\n')
+        if self.fields:
+            fp.write('  (fields\n')
+            for (ftype, fname) in self.fields:
+                fp.write('    \'("' + ftype + '" "' + fname + '")\n')
+            fp.write('  )\n')
+        fp.write(')\n\n')
+
+class PointerDef(Definition):
+    def __init__(self, name, *args):
+        self.name = name
+        self.module = None
+        self.c_name = None
+        self.typecode = None
+        self.fields = []
+        for arg in get_valid_scheme_definitions(args):
+            if arg[0] == 'in-module':
+                self.module = arg[1]
+            elif arg[0] == 'c-name':
+                self.c_name = arg[1]
+            elif arg[0] == 'gtype-id':
+                self.typecode = arg[1]
+            elif arg[0] == 'fields':
+                for parg in arg[1:]:
+                    self.fields.append((parg[0], parg[1]))
+    def merge(self, old):
+        # currently the .h parser doesn't try to work out what fields of
+        # an object structure should be public, so we just copy the list
+        # from the old version ...
+        self.fields = old.fields
+    def write_defs(self, fp=sys.stdout):
+        fp.write('(define-pointer ' + self.name + '\n')
+        if self.module:
+            fp.write('  (in-module "' + self.module + '")\n')
+        if self.c_name:
+            fp.write('  (c-name "' + self.c_name + '")\n')
+        if self.typecode:
+            fp.write('  (gtype-id "' + self.typecode + '")\n')
+        if self.fields:
+            fp.write('  (fields\n')
+            for (ftype, fname) in self.fields:
+                fp.write('    \'("' + ftype + '" "' + fname + '")\n')
+            fp.write('  )\n')
+        fp.write(')\n\n')
+
+class MethodDefBase(Definition):
+    def __init__(self, name, *args):
+        dump = 0
+        self.name = name
+        self.ret = None
+        self.caller_owns_return = None
+        self.unblock_threads = None
+        self.c_name = None
+        self.typecode = None
+        self.of_object = None
+        self.params = [] # of form (type, name, default, nullok)
+        self.varargs = 0
+        self.deprecated = None
+        for arg in get_valid_scheme_definitions(args):
+            if arg[0] == 'of-object':
+                self.of_object = arg[1]
+            elif arg[0] == 'docstring':
+                self.docstring = make_docstring(arg[1:])
+            elif arg[0] == 'c-name':
+                self.c_name = arg[1]
+            elif arg[0] == 'gtype-id':
+                self.typecode = arg[1]
+            elif arg[0] == 'return-type':
+                type_name = arg[1]
+                optional = False
+                for prop in arg[2:]:
+                    if prop[0] == 'optional':
+                        optional = True
+                self.ret = ReturnType(type_name, optional)
+            elif arg[0] == 'caller-owns-return':
+                self.caller_owns_return = arg[1] in ('t', '#t')
+            elif arg[0] == 'unblock-threads':
+                self.unblock_threads = arg[1] in ('t', '#t')
+            elif arg[0] == 'parameters':
+                for parg in arg[1:]:
+                    ptype = parg[0]
+                    pname = parg[1]
+                    pdflt = None
+                    pnull = 0
+                    pdir = None
+                    for farg in parg[2:]:
+                        assert isinstance(farg, tuple)
+                        if farg[0] == 'default':
+                            pdflt = farg[1]
+                        elif farg[0] == 'null-ok':
+                            pnull = 1
+                        elif farg[0] == 'direction':
+                            pdir = farg[1]
+                    self.params.append(Parameter(ptype, pname, pdflt, pnull, pdir))
+            elif arg[0] == 'varargs':
+                self.varargs = arg[1] in ('t', '#t')
+            elif arg[0] == 'deprecated':
+                self.deprecated = arg[1]
+            else:
+                sys.stderr.write("Warning: %s argument unsupported.\n"
+                                 % (arg[0]))
+                dump = 1
+        if dump:
+            self.write_defs(sys.stderr)
+
+        if self.caller_owns_return is None and self.ret is not None:
+            self.guess_return_value_ownership()
+
+    def merge(self, old, parmerge):
+        self.caller_owns_return = old.caller_owns_return
+        self.varargs = old.varargs
+        # here we merge extra parameter flags accross to the new object.
+        if not parmerge:
+            self.params = copy.deepcopy(old.params)
+            return
+        for i in range(len(self.params)):
+            ptype, pname, pdflt, pnull = self.params[i]
+            for p2 in old.params:
+                if p2[1] == pname:
+                    self.params[i] = (ptype, pname, p2[2], p2[3])
+                    break
+    def _write_defs(self, fp=sys.stdout):
+        if self.of_object != (None, None):
+            fp.write('  (of-object "' + self.of_object + '")\n')
+        if self.c_name:
+            fp.write('  (c-name "' + self.c_name + '")\n')
+        if self.typecode:
+            fp.write('  (gtype-id "' + self.typecode + '")\n')
+        if self.caller_owns_return:
+            fp.write('  (caller-owns-return #t)\n')
+        if self.unblock_threads:
+            fp.write('  (unblock_threads #t)\n')
+        if self.ret:
+            fp.write('  (return-type "' + self.ret + '")\n')
+        if self.deprecated:
+            fp.write('  (deprecated "' + self.deprecated + '")\n')
+        if self.params:
+            fp.write('  (parameters\n')
+            for ptype, pname, pdflt, pnull in self.params:
+                fp.write('    \'("' + ptype + '" "' + pname +'"')
+                if pdflt: fp.write(' (default "' + pdflt + '")')
+                if pnull: fp.write(' (null-ok)')
+                fp.write(')\n')
+            fp.write('  )\n')
+        if self.varargs:
+            fp.write('  (varargs #t)\n')
+        fp.write(')\n\n')
+
+
+class MethodDef(MethodDefBase):
+    def __init__(self, name, *args):
+        MethodDefBase.__init__(self, name, *args)
+        for item in ('c_name', 'of_object'):
+            if self.__dict__[item] == None:
+                self.write_defs(sys.stderr)
+                raise RuntimeError("definition missing required %s" % (item,))
+
+    def write_defs(self, fp=sys.stdout):
+        fp.write('(define-method ' + self.name + '\n')
+        self._write_defs(fp)
+
+class VirtualDef(MethodDefBase):
+    def write_defs(self, fp=sys.stdout):
+        fp.write('(define-virtual ' + self.name + '\n')
+        self._write_defs(fp)
+
+class FunctionDef(Definition):
+    def __init__(self, name, *args):
+        dump = 0
+        self.name = name
+        self.in_module = None
+        self.is_constructor_of = None
+        self.ret = None
+        self.caller_owns_return = None
+        self.unblock_threads = None
+        self.c_name = None
+        self.typecode = None
+        self.params = [] # of form (type, name, default, nullok)
+        self.varargs = 0
+        self.deprecated = None
+        for arg in get_valid_scheme_definitions(args):
+            if arg[0] == 'in-module':
+                self.in_module = arg[1]
+            elif arg[0] == 'docstring':
+                self.docstring = make_docstring(arg[1:])
+            elif arg[0] == 'is-constructor-of':
+                self.is_constructor_of = arg[1]
+            elif arg[0] == 'c-name':
+                self.c_name = arg[1]
+            elif arg[0] == 'gtype-id':
+                self.typecode = arg[1]
+            elif arg[0] == 'return-type':
+                self.ret = arg[1]
+            elif arg[0] == 'caller-owns-return':
+                self.caller_owns_return = arg[1] in ('t', '#t')
+            elif arg[0] == 'unblock-threads':
+                self.unblock_threads = arg[1] in ('t', '#t')
+            elif arg[0] == 'parameters':
+                for parg in arg[1:]:
+                    ptype = parg[0]
+                    pname = parg[1]
+                    pdflt = None
+                    pnull = 0
+                    for farg in parg[2:]:
+                        if farg[0] == 'default':
+                            pdflt = farg[1]
+                        elif farg[0] == 'null-ok':
+                            pnull = 1
+                    self.params.append(Parameter(ptype, pname, pdflt, pnull))
+            elif arg[0] == 'properties':
+                if self.is_constructor_of is None:
+                    print >> sys.stderr, "Warning: (properties ...) "\
+                          "is only valid for constructors"
+                for prop in arg[1:]:
+                    pname = prop[0]
+                    optional = False
+                    argname = pname
+                    for farg in prop[1:]:
+                        if farg[0] == 'optional':
+                            optional = True
+                        elif farg[0] == 'argname':
+                            argname = farg[1]
+                    self.params.append(Property(pname, optional, argname))
+            elif arg[0] == 'varargs':
+                self.varargs = arg[1] in ('t', '#t')
+            elif arg[0] == 'deprecated':
+                self.deprecated = arg[1]
+            else:
+                sys.stderr.write("Warning: %s argument unsupported\n"
+                                 % (arg[0],))
+                dump = 1
+        if dump:
+            self.write_defs(sys.stderr)
+
+        if self.caller_owns_return is None and self.ret is not None:
+            self.guess_return_value_ownership()
+        for item in ('c_name',):
+            if self.__dict__[item] == None:
+                self.write_defs(sys.stderr)
+                raise RuntimeError("definition missing required %s" % (item,))
+
+    _method_write_defs = MethodDef.__dict__['write_defs']
+
+    def merge(self, old, parmerge):
+        self.caller_owns_return = old.caller_owns_return
+        self.varargs = old.varargs
+        if not parmerge:
+            self.params = copy.deepcopy(old.params)
+            return
+        # here we merge extra parameter flags accross to the new object.
+        def merge_param(param):
+            for old_param in old.params:
+                if old_param.pname == param.pname:
+                    if isinstance(old_param, Property):
+                        # h2def never scans Property's, therefore if
+                        # we have one it was manually written, so we
+                        # keep it.
+                        return copy.deepcopy(old_param)
+                    else:
+                        param.merge(old_param)
+                        return param
+            raise RuntimeError("could not find %s in old_parameters %r" % (
+                param.pname, [p.pname for p in old.params]))
+        try:
+            self.params = map(merge_param, self.params)
+        except RuntimeError:
+            # parameter names changed and we can't find a match; it's
+            # safer to keep the old parameter list untouched.
+            self.params = copy.deepcopy(old.params)
+
+        if not self.is_constructor_of:
+            try:
+                self.is_constructor_of = old.is_constructor_of
+            except AttributeError:
+                pass
+        if isinstance(old, MethodDef):
+            self.name = old.name
+            # transmogrify from function into method ...
+            self.write_defs = self._method_write_defs
+            self.of_object = old.of_object
+            del self.params[0]
+    def write_defs(self, fp=sys.stdout):
+        fp.write('(define-function ' + self.name + '\n')
+        if self.in_module:
+            fp.write('  (in-module "' + self.in_module + '")\n')
+        if self.is_constructor_of:
+            fp.write('  (is-constructor-of "' + self.is_constructor_of +'")\n')
+        if self.c_name:
+            fp.write('  (c-name "' + self.c_name + '")\n')
+        if self.typecode:
+            fp.write('  (gtype-id "' + self.typecode + '")\n')
+        if self.caller_owns_return:
+            fp.write('  (caller-owns-return #t)\n')
+        if self.unblock_threads:
+            fp.write('  (unblock-threads #t)\n')
+        if self.ret:
+            fp.write('  (return-type "' + self.ret + '")\n')
+        if self.deprecated:
+            fp.write('  (deprecated "' + self.deprecated + '")\n')
+        if self.params:
+            if isinstance(self.params[0], Parameter):
+                fp.write('  (parameters\n')
+                for ptype, pname, pdflt, pnull in self.params:
+                    fp.write('    \'("' + ptype + '" "' + pname +'"')
+                    if pdflt: fp.write(' (default "' + pdflt + '")')
+                    if pnull: fp.write(' (null-ok)')
+                    fp.write(')\n')
+                fp.write('  )\n')
+            elif isinstance(self.params[0], Property):
+                fp.write('  (properties\n')
+                for prop in self.params:
+                    fp.write('    \'("' + prop.pname +'"')
+                    if prop.optional: fp.write(' (optional)')
+                    fp.write(')\n')
+                fp.write('  )\n')
+            else:
+                assert False, "strange parameter list %r" % self.params[0]
+        if self.varargs:
+            fp.write('  (varargs #t)\n')
+
+        fp.write(')\n\n')
diff --git a/tools/defs_gen/defsparser.py b/tools/defs_gen/defsparser.py
new file mode 100644
index 00000000..37ba0a2f
--- /dev/null
+++ b/tools/defs_gen/defsparser.py
@@ -0,0 +1,153 @@
+# -*- Mode: Python; py-indent-offset: 4 -*-
+import os, sys
+import scmexpr
+from definitions import BoxedDef, EnumDef, FlagsDef, FunctionDef, \
+     InterfaceDef, MethodDef, ObjectDef, PointerDef, VirtualDef
+
+include_path = ['.']
+
+class IncludeParser(scmexpr.Parser):
+    """A simple parser that follows include statements automatically"""
+    def include(self, input_filename):
+        global include_path
+        if os.path.isabs(input_filename):
+            filename = input_filename
+            # set self.filename to the include name, to handle recursive includes
+            oldfile = self.filename
+            self.filename = filename
+            self.startParsing()
+            self.filename = oldfile
+        else:
+            inc_path = [os.path.dirname(self.filename)] + include_path
+            for filename in [os.path.join(path_entry, input_filename)
+                             for path_entry in inc_path]:
+                if not os.path.exists(filename):
+                    continue
+                # set self.filename to the include name, to handle recursive includes
+                oldfile = self.filename
+                self.filename = filename
+                self.startParsing()
+                self.filename = oldfile
+                break
+            else:
+                raise IOError("%s not found in include path %s" % (input_filename, inc_path))
+
+class DefsParser(IncludeParser):
+    def __init__(self, arg, defines={}):
+        IncludeParser.__init__(self, arg)
+        self.objects = []
+        self.interfaces = []
+        self.enums = []      # enums and flags
+        self.boxes = []      # boxed types
+        self.pointers = []   # pointer types
+        self.functions = []  # functions and methods
+        self.virtuals = []   # virtual methods
+        self.c_name = {}     # hash of c names of functions
+        self.methods = {}    # hash of methods of particular objects
+        self.defines = defines      # -Dfoo=bar options, as dictionary
+
+    def define_object(self, *args):
+        odef = apply(ObjectDef, args)
+        self.objects.append(odef)
+        self.c_name[odef.c_name] = odef
+    def define_interface(self, *args):
+        idef = apply(InterfaceDef, args)
+        self.interfaces.append(idef)
+        self.c_name[idef.c_name] = idef
+    def define_enum(self, *args):
+        edef = apply(EnumDef, args)
+        self.enums.append(edef)
+        self.c_name[edef.c_name] = edef
+    def define_flags(self, *args):
+        fdef = apply(FlagsDef, args)
+        self.enums.append(fdef)
+        self.c_name[fdef.c_name] = fdef
+    def define_boxed(self, *args):
+        bdef = apply(BoxedDef, args)
+        self.boxes.append(bdef)
+        self.c_name[bdef.c_name] = bdef
+    def define_pointer(self, *args):
+        pdef = apply(PointerDef, args)
+        self.pointers.append(pdef)
+        self.c_name[pdef.c_name] = pdef
+    def define_function(self, *args):
+        fdef = apply(FunctionDef, args)
+        self.functions.append(fdef)
+        self.c_name[fdef.c_name] = fdef
+    def define_method(self, *args):
+        mdef = apply(MethodDef, args)
+        self.functions.append(mdef)
+        self.c_name[mdef.c_name] = mdef
+    def define_virtual(self, *args):
+        vdef = apply(VirtualDef, args)
+        self.virtuals.append(vdef)
+    def merge(self, old, parmerge):
+        for obj in self.objects:
+            if old.c_name.has_key(obj.c_name):
+                obj.merge(old.c_name[obj.c_name])
+        for f in self.functions:
+            if old.c_name.has_key(f.c_name):
+                f.merge(old.c_name[f.c_name], parmerge)
+
+    def printMissing(self, old):
+        for obj in self.objects:
+            if not old.c_name.has_key(obj.c_name):
+                obj.write_defs()
+        for f in self.functions:
+            if not old.c_name.has_key(f.c_name):
+                f.write_defs()
+
+    def write_defs(self, fp=sys.stdout):
+        for obj in self.objects:
+            obj.write_defs(fp)
+        for enum in self.enums:
+            enum.write_defs(fp)
+        for boxed in self.boxes:
+            boxed.write_defs(fp)
+        for pointer in self.pointers:
+            pointer.write_defs(fp)
+        for func in self.functions:
+            func.write_defs(fp)
+
+    def find_object(self, c_name):
+        for obj in self.objects:
+            if obj.c_name == c_name:
+                return obj
+        else:
+            raise ValueError('object %r not found' % c_name)
+
+    def find_constructor(self, obj, overrides):
+        for func in self.functions:
+            if isinstance(func, FunctionDef) and \
+               func.is_constructor_of == obj.c_name and \
+               not overrides.is_ignored(func.c_name):
+                return func
+
+    def find_methods(self, obj):
+        objname = obj.c_name
+        return filter(lambda func, on=objname: isinstance(func, MethodDef) and
+                      func.of_object == on, self.functions)
+
+    def find_virtuals(self, obj):
+        objname = obj.c_name
+        retval = filter(lambda func, on=objname: isinstance(func, VirtualDef) and
+                        func.of_object == on, self.virtuals)
+        return retval
+
+    def find_functions(self):
+        return filter(lambda func: isinstance(func, FunctionDef) and
+                      not func.is_constructor_of, self.functions)
+
+    def ifdef(self, *args):
+        if args[0] in self.defines:
+            for arg in args[1:]:
+                #print >> sys.stderr, "-----> Handling conditional definition (%s): %s" % (args[0], arg)
+                self.handle(arg)
+        else:
+            pass
+            #print >> sys.stderr, "-----> Conditional %s is not true" % (args[0],)
+
+    def ifndef(self, *args):
+        if args[0] not in self.defines:
+            for arg in args[1:]:
+                self.handle(arg)
diff --git a/tools/defs_gen/docextract.py b/tools/defs_gen/docextract.py
new file mode 100644
index 00000000..13beeffb
--- /dev/null
+++ b/tools/defs_gen/docextract.py
@@ -0,0 +1,461 @@
+# -*- Mode: Python; py-indent-offset: 4 -*-
+'''Simple module for extracting GNOME style doc comments from C
+sources, so I can use them for other purposes.'''
+
+import sys, os, string, re
+
+# Used to tell if the "Since: ..." portion of the gtkdoc function description
+# should be omitted.  This is useful for some C++ modules such as gstreamermm
+# that wrap C API which is still unstable and including this information would
+# not be useful.
+# This variable is modified from docextract_to_xml based on the --no-since
+# option being specified.
+no_since = False
+
+__all__ = ['extract']
+
+class GtkDoc:
+    def __init__(self):
+        self.name = None
+        self.block_type = '' # The block type ('function', 'signal', 'property')
+        self.params = []
+        self.annotations = []
+        self.description = ''
+        self.ret = ('', []) # (return, annotations)
+    def set_name(self, name):
+        self.name = name
+    def set_type(self, block_type):
+        self.block_type = block_type
+    def get_type(self):
+        return self.block_type
+    def add_param(self, name, description, annotations=[]):
+        if name == '...':
+            name = 'Varargs'
+        self.params.append((name, description, annotations))
+    def append_to_last_param(self, extra):
+        self.params[-1] = (self.params[-1][0], self.params[-1][1] + extra,
+            self.params[-1][2])
+    def append_to_named_param(self, name, extra):
+        for i in range(len(self.params)):
+            if self.params[i][0] == name:
+                self.params[i] = (name, self.params[i][1] + extra,
+                    self.params[i][2])
+                return
+        # fall through to adding extra parameter ...
+        self.add_param(name, extra)
+    def add_annotation(self, annotation):
+        self.annotations.append(annotation)
+    def get_annotations(self):
+        return self.annotations
+    def append_to_description(self, extra):
+        self.description = self.description + extra
+    def get_description(self):
+        return self.description
+    def add_return(self, first_line, annotations=[]):
+        self.ret = (first_line, annotations)
+    def append_to_return(self, extra):
+        self.ret = (self.ret[0] + extra, self.ret[1])
+
+comment_start_pattern = re.compile(r'^\s*/\*\*\s')
+comment_end_pattern = re.compile(r'^\s*\*+/')
+comment_line_lead_pattern = re.compile(r'^\s*\*\s*')
+comment_empty_line_pattern = re.compile(r'^\s*\**\s*$')
+function_name_pattern = re.compile(r'^([a-z]\w*)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
+signal_name_pattern = re.compile(r'^([A-Z]\w+::[a-z0-9-]+)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
+property_name_pattern = re.compile(r'^([A-Z]\w+:[a-z0-9-]+)\s*:?(\s*\(.*\)\s*){0,2}\s*$')
+return_pattern = re.compile(r'^@?(returns:|return\s+value:)(.*\n?)$', re.IGNORECASE)
+deprecated_pattern = re.compile(r'^(deprecated\s*:\s*.*\n?)$', re.IGNORECASE)
+rename_to_pattern = re.compile(r'^(rename\s+to)\s*:\s*(.*\n?)$', re.IGNORECASE)
+param_pattern = re.compile(r'^@(\S+)\s*:(.*\n?)$')
+# Used to extract the annotations in the parameter and return descriptions
+# extracted using above [param|return]_pattern patterns.
+annotations_pattern = re.compile(r'^(?:(\s*\(.*\)\s*)*:)')
+# Used to construct the annotation lists.
+annotation_lead_pattern = re.compile(r'^\s*\(\s*(.*?)\s*\)\s*')
+
+# These patterns determine the identifier of the current comment block.  They
+# are grouped in a list for easy determination of block identifiers (in
+# skip_to_identifier).  The function_name_pattern should be tested for last
+# because it always matches signal and property identifiers.
+identifier_patterns = [ signal_name_pattern, property_name_pattern, function_name_pattern ]
+
+# This pattern is to match return sections that forget to have a colon (':')
+# after the initial 'Return' phrase.  It is not included by default in the list
+# of final sections below because a lot of function descriptions begin with
+# 'Returns ...' and the process_description() function would stop right at that
+# first line, thinking it is a return section.
+no_colon_return_pattern = re.compile(r'^@?(returns|return\s+value)\s*(.*\n?)$', re.IGNORECASE)
+since_pattern = re.compile(r'^(since\s*:\s*.*\n?)$', re.IGNORECASE)
+
+# These patterns normally will be encountered after the description.  Knowing
+# the order of their appearance is difficult so this list is used to test when
+# one begins and the other ends when processing the rest of the sections after
+# the description.
+final_section_patterns = [ return_pattern, since_pattern, deprecated_pattern, rename_to_pattern ]
+
+def parse_file(fp, doc_dict):
+    line = fp.readline()
+    while line:
+        cur_doc = GtkDoc()
+        line = skip_to_comment_block(fp, line)
+        line = skip_to_identifier(fp, line, cur_doc)
+        # See if the identifier is found (stored in the current GtkDoc by
+        # skip_to_identifier).  If so, continue reading the rest of the comment
+        # block.
+        if cur_doc.name:
+            line = process_params(fp, line, cur_doc)
+            line = process_description(fp, line, cur_doc)
+            line = process_final_sections(fp, line, cur_doc)
+            # Add the current doc block to the dictionary of doc blocks.
+            doc_dict[cur_doc.name] = cur_doc
+
+# Given a list of annotations as string of the form 
+# '(annotation1) (annotation2) ...' return a list of annotations of the form
+# [ (name1, value1), (name2, value2) ... ].  Not all annotations have values so
+# the values in the list of tuples could be empty ('').
+def get_annotation_list(annotations):
+    annotation_list = []
+    while annotations:
+        match = annotation_lead_pattern.match(annotations)
+        if match:
+            annotation_contents = match.group(1)
+            name, split, value = annotation_contents.strip().partition(' ')
+            annotation_list.append((name, value))
+            # Remove first occurrence to continue processing.
+            annotations = annotation_lead_pattern.sub('', annotations)
+        else:
+            break
+    return annotation_list
+
+# Given a currently read line, test that line and continue reading until the
+# beginning of a comment block is found or eof is reached.  Return the last
+# read line.
+def skip_to_comment_block(fp, line):
+    while line:
+        if comment_start_pattern.match(line):
+            break
+        line = fp.readline()
+    return line
+
+# Given the current line in a comment block, continue skipping lines until a
+# non-blank line in the comment block is found or until the end of the block
+# (or eof) is reached.  Returns the line where reading stopped.
+def skip_to_nonblank(fp, line):
+    while line:
+        if not comment_empty_line_pattern.match(line):
+            break
+        line = fp.readline()
+        # Stop processing if eof or end of comment block is reached.
+        if not line or comment_end_pattern.match(line):
+            break
+    return line
+
+# Given the first line of a comment block (the '/**'), see if the next
+# non-blank line is the identifier of the comment block.  Stop processing if
+# the end of the block or eof is reached.  Store the identifier (if there is
+# one) and its type ('function', 'signal' or 'property') in the given GtkDoc.
+# Return the line where the identifier is found or the line that stops the
+# processing (if eof or the end of the comment block is found first).
+def skip_to_identifier(fp, line, cur_doc):
+    # Skip the initial comment block line ('/**') if not eof.
+    if line: line = fp.readline()
+
+    # Now skip empty lines.
+    line = skip_to_nonblank(fp, line)
+
+    # See if the first non-blank line is the identifier.
+    if line and not comment_end_pattern.match(line):
+        # Remove the initial ' * ' in comment block line and see if there is an
+        # identifier.
+        line = comment_line_lead_pattern.sub('', line)
+        for pattern in identifier_patterns:
+            match = pattern.match(line)
+            if match:
+                # Set the GtkDoc name.
+                cur_doc.set_name(match.group(1))
+                # Get annotations and add them to the GtkDoc.
+                annotations = get_annotation_list(match.group(2))
+                for annotation in annotations:
+                    cur_doc.add_annotation(annotation)
+                # Set the GtkDoc type.
+                if pattern == signal_name_pattern:
+                    cur_doc.set_type('signal')
+                elif pattern == property_name_pattern:
+                    cur_doc.set_type('property')
+                elif pattern == function_name_pattern:
+                    cur_doc.set_type('function')
+                return line
+    return line
+
+# Given a currently read line (presumably the identifier line), read the next
+# lines, testing to see if the lines are part of parameter descriptions.  If
+# so, store the parameter descriptions in the given doc block.  Stop on eof and
+# return the last line that stops the processing.
+def process_params(fp, line, cur_doc):
+    # Skip the identifier line if not eof.  Also skip any blank lines in the
+    # comment block.  Return if eof or the end of the comment block are
+    # encountered.
+    if line: line = fp.readline()
+    line = skip_to_nonblank(fp, line)
+    if not line or comment_end_pattern.match(line):
+        return line
+
+    # Remove initial ' * ' in first non-empty comment block line.
+    line = comment_line_lead_pattern.sub('', line)
+
+    # Now process possible parameters as long as no eof or the end of the
+    # param section is not reached (which could be triggered by anything that
+    # doesn't match a '@param:..." line, even the end of the comment block).
+    match = param_pattern.match(line)
+    while line and match:
+        description = match.group(2)
+
+        # First extract the annotations from the description and save them.
+        annotations = []
+        annotation_match = annotations_pattern.match(description)
+        if annotation_match:
+            annotations = get_annotation_list(annotation_match.group(1))
+            # Remove the annotations from the description
+            description = annotations_pattern.sub('', description)
+
+        # Default to appending lines to current parameter.
+        append_func = cur_doc.append_to_last_param
+
+        # See if the return has been included as part of the parameter
+        # section and make sure that lines are added to the GtkDoc return if
+        # so.
+        if match.group(1).lower() == "returns":
+            cur_doc.add_return(description, annotations)
+            append_func = cur_doc.append_to_return
+        # If not, just add it as a regular parameter.
+        else:
+            cur_doc.add_param(match.group(1), description, annotations)
+
+        # Now read lines and append them until next parameter, beginning of
+        # description (an empty line), the end of the comment block or eof.
+        line = fp.readline()
+        while line:
+            # Stop processing if end of comment block or a blank comment line
+            # is encountered.
+            if comment_empty_line_pattern.match(line) or \
+                    comment_end_pattern.match(line):
+                break
+
+            # Remove initial ' * ' in comment block line.
+            line = comment_line_lead_pattern.sub('', line)
+
+            # Break from current param processing if a new one is
+            # encountered.
+            if param_pattern.match(line): break;
+
+            # Otherwise, just append the current line and get the next line.
+            append_func(line)
+            line = fp.readline()
+
+        # Re-evaluate match for while condition
+        match = param_pattern.match(line)
+
+    # End by returning the current line.
+    return line
+
+# Having processed parameters, read the following lines into the description of
+# the current doc block until the end of the comment block, the end of file or
+# a return section is encountered.
+def process_description(fp, line, cur_doc):
+    # First skip empty lines returning on eof or end of comment block.
+    line = skip_to_nonblank(fp, line)
+    if not line or comment_end_pattern.match(line):
+        return line
+
+    # Remove initial ' * ' in non-empty comment block line.
+    line = comment_line_lead_pattern.sub('', line)
+
+    # Also remove possible 'Description:' prefix.
+    if line[:12] == 'Description:': line = line[12:]
+
+    # Used to tell if the previous line was blank and a return section
+    # uncommonly marked with 'Returns ...' instead of 'Returns: ...'  has
+    # started (assume it is non-empty to begin with).
+    prev_line = 'non-empty'
+
+    # Now read lines until a new section (like a return or a since section) is
+    # encountered.
+    while line:
+        # See if the description section has ended (if the line begins with
+        # 'Returns ...' and the previous line was empty -- this loop replaces
+        # empty lines with a newline).
+        if no_colon_return_pattern.match(line) and prev_line == '\n':
+            return line
+        # Or if one of the patterns of the final sections match
+        for pattern in final_section_patterns:
+            if pattern.match(line):
+                return line
+
+        # If not, append lines to description in the doc comment block.
+        cur_doc.append_to_description(line)
+
+        prev_line = line
+        line = fp.readline()
+
+        # Stop processing on eof or at the end of comment block.
+        if not line or comment_end_pattern.match(line):
+            return line
+
+        # Remove initial ' * ' in line so that the text can be appended to the
+        # description of the comment block and make sure that if the line is
+        # empty it be interpreted as a newline.
+        line = comment_line_lead_pattern.sub('', line)
+        if not line: line = '\n'
+
+# Given the line that ended the description (the first line of one of the final
+# sections) process the final sections ('Returns:', 'Since:', etc.) until the
+# end of the comment block or eof.  Return the line that ends the processing.
+def process_final_sections(fp, line, cur_doc):
+    while line and not comment_end_pattern.match(line):
+        # Remove leading ' * ' from current non-empty comment line.
+        line = comment_line_lead_pattern.sub('', line)
+        # Temporarily append the no colon return pattern to the final section
+        # patterns now that the description has been processed.  It will be
+        # removed after the for loop below executes so that future descriptions
+        # that begin with 'Returns ...' are not interpreted as a return
+        # section.
+        final_section_patterns.append(no_colon_return_pattern)
+        for pattern in final_section_patterns:
+            match = pattern.match(line)
+            if match:
+                if pattern == return_pattern or \
+                        pattern == no_colon_return_pattern:
+                    # Dealing with a 'Returns:' so first extract the
+                    # annotations from the description and save them.
+                    description = match.group(2)
+                    annotations = []
+                    annotation_match = \
+                            annotations_pattern.match(description)
+                    if annotation_match:
+                        annotations = \
+                                get_annotation_list(annotation_match.group(1))
+                        # Remove the annotations from the description
+                        description = annotations_pattern.sub('', description)
+
+                    # Now add the return.
+                    cur_doc.add_return(description, annotations)
+                    # In case more lines need to be appended.
+                    append_func = cur_doc.append_to_return
+                elif pattern == rename_to_pattern:
+                    # Dealing with a 'Rename to:' section (GObjectIntrospection
+                    # annotation) so no further lines will be appended but this
+                    # single one (and only to the annotations).
+                    append_func = None
+                    cur_doc.add_annotation((match.group(1),
+                            match.group(2)))
+                else:
+                    # For all others ('Since:' and 'Deprecated:') just append
+                    # the line to the description for now.
+                    # But if --no-since is specified, don't append it.
+                    if no_since and pattern == since_pattern:
+                        pass
+                    else:
+                        cur_doc.append_to_description(line)
+
+                    # In case more lines need to be appended.
+                    append_func = cur_doc.append_to_description
+
+                # Stop final section pattern matching for loop since a match
+                # has already been found.
+                break
+
+        # Remove the no colon return pattern (which was temporarily added in
+        # the just executed loop) from the list of final section patterns.
+        final_section_patterns.pop()
+
+        line = fp.readline()
+
+        # Now continue appending lines to current section until a new one is
+        # found or an eof or the end of the comment block is encountered.
+        finished = False
+        while not finished and line and \
+                not comment_end_pattern.match(line):
+            # Remove leading ' * ' from line and make sure that if it is empty,
+            # it be interpreted as a newline.
+            line = comment_line_lead_pattern.sub('', line)
+            if not line: line = '\n'
+
+            for pattern in final_section_patterns:
+                if pattern.match(line):
+                    finished = True
+                    break
+
+            # Break out of loop if a new section is found (determined in above
+            # inner loop).
+            if finished: break
+
+            # Now it's safe to append line.
+            if append_func: append_func(line)
+
+            # Get the next line to continue processing.
+            line = fp.readline()
+
+    return line
+
+def parse_dir(dir, doc_dict):
+    for file in os.listdir(dir):
+        if file in ('.', '..'): continue
+        path = os.path.join(dir, file)
+        if os.path.isdir(path):
+            parse_dir(path, doc_dict)
+        if len(file) > 2 and file[-2:] == '.c':
+            sys.stderr.write("Processing " + path + '\n')
+            parse_file(open(path, 'r'), doc_dict)
+
+def extract(dirs, doc_dict=None):
+    if not doc_dict: doc_dict = {}
+    for dir in dirs:
+        parse_dir(dir, doc_dict)
+    return doc_dict
+
+tmpl_section_pattern = re.compile(r'^<!-- ##### (\w+) (\w+) ##### -->$')
+def parse_tmpl(fp, doc_dict):
+    cur_doc = None
+
+    line = fp.readline()
+    while line:
+        match = tmpl_section_pattern.match(line)
+        if match:
+            cur_doc = None  # new input shouldn't affect the old doc dict
+            sect_type = match.group(1)
+            sect_name = match.group(2)
+
+            if sect_type == 'FUNCTION':
+                cur_doc = doc_dict.get(sect_name)
+                if not cur_doc:
+                    cur_doc = GtkDoc()
+                    cur_doc.set_name(sect_name)
+                    doc_dict[sect_name] = cur_doc
+        elif line == '<!-- # Unused Parameters # -->\n':
+            cur_doc = None # don't worry about unused params.
+        elif cur_doc:
+            if line[:10] == '@Returns: ':
+                if string.strip(line[10:]):
+                    cur_doc.append_to_return(line[10:])
+            elif line[0] == '@':
+                pos = string.find(line, ':')
+                if pos >= 0:
+                    cur_doc.append_to_named_param(line[1:pos], line[pos+1:])
+                else:
+                    cur_doc.append_to_description(line)
+            else:
+                cur_doc.append_to_description(line)
+
+        line = fp.readline()
+
+def extract_tmpl(dirs, doc_dict=None):
+    if not doc_dict: doc_dict = {}
+    for dir in dirs:
+        for file in os.listdir(dir):
+            if file in ('.', '..'): continue
+            path = os.path.join(dir, file)
+            if os.path.isdir(path):
+                continue
+            if len(file) > 2 and file[-2:] == '.sgml':
+                parse_tmpl(open(path, 'r'), doc_dict)
+    return doc_dict
diff --git a/tools/defs_gen/docextract_to_xml.py b/tools/defs_gen/docextract_to_xml.py
new file mode 100755
index 00000000..94b54b19
--- /dev/null
+++ b/tools/defs_gen/docextract_to_xml.py
@@ -0,0 +1,142 @@
+#!/usr/bin/env python
+# -*- Mode: Python; py-indent-offset: 4 -*-
+#
+# This litte script outputs the C doc comments to an XML format.
+# So far it's only used by gtkmm (The C++ bindings). Murray Cumming.
+# Usage example:
+# # ./docextract_to_xml.py -s /gnome/head/cvs/gtk+/gtk/ -s /gnome/head/cvs/gtk+/docs/reference/gtk/tmpl/ > gtk_docs.xml
+
+import getopt
+import re
+import string
+import sys
+
+import docextract
+
+def usage():
+    sys.stderr.write('usage: docextract_to_xml.py ' +
+        '[-s /src/dir | --source-dir=/src/dir] ' +
+        '[-a | --with-annotations] [-p | --with-properties] ' +
+        '[-i | --with-signals ] [-n | --no-since]\n')
+    sys.exit(1)
+
+# Translates special texts to &... HTML acceptable format.  Also replace
+# occurrences of '/*' and '*/' with '/ *' and '* /' respectively to avoid
+# comment errors (note the spaces).  Some function descriptions include C++
+# multi-line comments which cause errors when the description is included in a
+# C++ Doxygen comment block.
+def escape_text(unescaped_text):
+    # Escape every "&" not part of an entity reference
+    escaped_text = re.sub(r'&(?![A-Za-z]+;)', '&amp;', unescaped_text)
+
+    # These weird entities turn up in the output...
+    escaped_text = string.replace(escaped_text, '&mdash;', '&#8212;')
+    escaped_text = string.replace(escaped_text, '&ast;', '*')
+    escaped_text = string.replace(escaped_text, '&percnt;', '%')
+    escaped_text = string.replace(escaped_text, '&commat;', '@')
+    escaped_text = string.replace(escaped_text, '&num;', '&#35;')
+    escaped_text = string.replace(escaped_text, '&nbsp;', '&#160;')
+    # This represents a '/' before or after an '*' so replace with slash but
+    # with spaces.
+    escaped_text = string.replace(escaped_text, '&sol;', ' / ')
+
+    # Escape for both tag contents and attribute values
+    escaped_text = string.replace(escaped_text, '<', '&lt;')
+    escaped_text = string.replace(escaped_text, '>', '&gt;')
+    escaped_text = string.replace(escaped_text, '"', '&quot;')
+
+    # Replace C++ comment begin and ends to ones that don't affect Doxygen.
+    escaped_text = string.replace(escaped_text, '/*', '/ *')
+    escaped_text = string.replace(escaped_text, '*/', '* /')
+
+    return escaped_text
+
+def print_annotations(annotations):
+    for annotation in annotations:
+        print "<annotation name=" + annotation[0] +  ">" + \
+                escape_text(annotation[1]) + "</annotation>"
+
+if __name__ == '__main__':
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "d:s:o:apin",
+                                   ["source-dir=", "with-annotations",
+                                     "with-properties", "with-signals",
+                                     "no-since"])
+    except getopt.error, e:
+        sys.stderr.write('docextract_to_xml.py: %s\n' % e)
+        usage()
+    source_dirs = []
+    with_annotations = False
+    with_signals = False
+    with_properties = False
+    for opt, arg in opts:
+        if opt in ('-s', '--source-dir'):
+            source_dirs.append(arg)
+        if opt in ('-a', '--with-annotations'):
+            with_annotations = True
+        if opt in ('-p', '--with-properties'):
+            with_properties = True
+        if opt in ('-i', '--with-signals'):
+            with_signals = True
+        if opt in ('-n', '--no-since'):
+            docextract.no_since = True
+    if len(args) != 0:
+        usage()
+
+    docs = docextract.extract(source_dirs);
+    docextract.extract_tmpl(source_dirs, docs); #Try the tmpl sgml files too.
+
+    # print d.docs
+
+    if docs:
+
+        print "<root>"
+
+        for name, value in sorted(docs.items()):
+            # Get the type of comment block ('function', 'signal' or
+            # 'property') (the value is a GtkDoc).
+            block_type = value.get_type()
+
+            # Skip signals if the option was not specified.
+            if block_type == 'signal' and not with_signals:
+                continue
+            # Likewise for properties.
+            elif block_type == 'property' and not with_properties:
+                continue
+
+            print "<" + block_type + " name=\"" + escape_text(name) + "\">"
+
+            print "<description>"
+            print escape_text(value.get_description())
+            print "</description>"
+
+            # Loop through the parameters if not dealing with a property:
+            if block_type != 'property':
+                print "<parameters>"
+                for name, description, annotations in value.params:
+                        print "<parameter name=\"" + escape_text(name) + "\">"
+                        print "<parameter_description>" + escape_text(description) + "</parameter_description>"
+
+                        if with_annotations:
+                            print_annotations(annotations)
+
+                        print "</parameter>"
+
+                print "</parameters>"
+
+                # Show the return-type (also if not dealing with a property):
+                if with_annotations:
+                    print "<return>"
+                    print "<return_description>" + escape_text(value.ret[0]) + \
+                            "</return_description>"
+                    print_annotations(value.ret[1])
+                    print "</return>"
+                else:
+                    print "<return>" + escape_text(value.ret[0]) + "</return>"
+
+            if with_annotations:
+                print_annotations(value.get_annotations())
+
+            print "</" + block_type + ">\n"
+
+        print "</root>"
diff --git a/tools/defs_gen/h2def.py b/tools/defs_gen/h2def.py
new file mode 100755
index 00000000..17617fac
--- /dev/null
+++ b/tools/defs_gen/h2def.py
@@ -0,0 +1,631 @@
+#!/usr/bin/env python
+# -*- Mode: Python; py-indent-offset: 4 -*-
+# GPL'ed
+# Toby D. Reeves <toby@max.rl.plh.af.mil>
+#
+# Modified by James Henstridge <james@daa.com.au> to output stuff in
+# Havoc's new defs format.  Info on this format can be seen at:
+#   http://mail.gnome.org/archives/gtk-devel-list/2000-January/msg00070.html
+# Updated to be PEP-8 compatible and refactored to use OOP
+#
+# Scan the given public .h files of a GTK module (or module using
+# GTK object conventions) and generates a set of scheme defs.
+#
+# h2def searches through a header file looking for function prototypes and
+# generates a scheme style defenition for each prototype.
+# Basically the operation of h2def is:
+#
+# - read each .h file into a buffer which is scrubbed of extraneous data
+# - find all object defenitions:
+#   - find all structures that may represent a GtkObject
+#   - find all structures that might represent a class
+#   - find all structures that may represent a GtkObject subclass
+#   - find all structures that might represent a class/Iface inherited from
+#     GTypeInterface
+# - find all enum defenitions
+# - write out the defs
+#
+# The command line options are:
+#
+#   -s --separate    Create separate files for objects and function/method defs
+#                    using the given name as the base name (optional). If this
+#                    is not specified the combined object and function defs
+#                    will be output to sys.stdout.
+#   -f --defsfilter  Extract defs from the given file to filter the output defs
+#                    that is don't output defs that are defined in the
+#                    defsfile. More than one deffile may be specified.
+#   -m --modulename  The prefix to be stripped from the front of function names
+#                    for the given module
+#   -n --namespace   The module or namespace name to be used, for example
+#                    WebKit where h2def is unable to detect the module name
+#                    automatically. it also sets the gtype-id prefix.
+#   --onlyenums      Only produce defs for enums and flags
+#   --onlyobjdefs    Only produce defs for objects
+#   -v               Verbose output
+#
+# Examples:
+#
+# python h2def.py /usr/local/include/pango-1.0/pango/*.h >/tmp/pango.defs
+#
+# - Outputs all defs for the pango module.
+#
+# python h2def.py -m gdk -s /tmp/gdk-2.10 \
+#            -f /usr/tmp/pygtk/gtk/gdk-base.defs \
+#            /usr/local/include/gtk-2.0/gdk/*.h \
+#            /usr/local/include/gtk-2.0/gdk-pixbuf/*.h
+#
+# - Outputs the gdk module defs that are not contained in the defs file
+#   /usr/tmp/pygtk/gtk/gdk-base.defs. Two output files are created:
+#   /tmp/gdk-2.10-types.defs and /tmp/gdk-2.10.defs.
+#
+# python h2def.py -n WebKit /usr/incude/webkit-1.0/webkit/*.h \
+#            >/tmp/webkit.defs
+#
+# - Outputs all the defs for webkit module, setting the module name to WebKit
+#   and the gtype-id prefix to WEBKIT_ which can't be detected automatically.
+#
+
+import getopt
+import os
+import re
+import string
+import sys
+
+import defsparser
+
+# ------------------ Create typecodes from typenames ---------
+
+_upperstr_pat1 = re.compile(r'([^A-Z])([A-Z])')
+_upperstr_pat2 = re.compile(r'([A-Z][A-Z])([A-Z][0-9a-z])')
+_upperstr_pat3 = re.compile(r'^([A-Z])([A-Z])')
+
+def to_upper_str(name):
+    """Converts a typename to the equivalent upercase and underscores
+    name.  This is used to form the type conversion macros and enum/flag
+    name variables"""
+    name = _upperstr_pat1.sub(r'\1_\2', name)
+    name = _upperstr_pat2.sub(r'\1_\2', name)
+    name = _upperstr_pat3.sub(r'\1_\2', name, count=1)
+    return string.upper(name)
+
+def typecode(typename, namespace=None):
+    """create a typecode (eg. GTK_TYPE_WIDGET) from a typename"""
+    if namespace:
+      return string.replace(string.upper(namespace) + "_" + to_upper_str(typename[len(namespace):]), '_', '_TYPE_', 1)
+
+    return string.replace(to_upper_str(typename), '_', '_TYPE_', 1)
+
+
+# ------------------ Find object definitions -----------------
+# Strips the comments from buffer
+def strip_comments(buf):
+    parts = []
+    lastpos = 0
+    while 1:
+        pos = string.find(buf, '/*', lastpos)
+        if pos >= 0:
+            parts.append(buf[lastpos:pos])
+            pos = string.find(buf, '*/', pos)
+            if pos >= 0:
+                lastpos = pos + 2
+            else:
+                break
+        else:
+            parts.append(buf[lastpos:])
+            break
+    return string.join(parts, '')
+
+# Strips the dll API from buffer, for example WEBKIT_API
+def strip_dll_api(buf):
+    pat = re.compile("[A-Z]*_API ")
+    buf = pat.sub("", buf)
+    return buf
+
+obj_name_pat = "[A-Z][a-z]*[A-Z][A-Za-z0-9]*"
+
+split_prefix_pat = re.compile('([A-Z]+[a-z]*)([A-Za-z0-9]+)')
+
+def find_obj_defs(buf, objdefs=[]):
+    """
+    Try to find object definitions in header files.
+    """
+
+    # filter out comments from buffer.
+    buf = strip_comments(buf)
+
+    # filter out dll api
+    buf = strip_dll_api(buf)
+
+    maybeobjdefs = []  # contains all possible objects from file
+
+    # first find all structures that look like they may represent a GtkObject
+    pat = re.compile("struct\s+_(" + obj_name_pat + ")\s*{\s*" +
+                     "(" + obj_name_pat + ")\s+", re.MULTILINE)
+    pos = 0
+    while pos < len(buf):
+        m = pat.search(buf, pos)
+        if not m: break
+        maybeobjdefs.append((m.group(1), m.group(2)))
+        pos = m.end()
+
+    # handle typedef struct { ... } style struct defs.
+    pat = re.compile("typedef struct\s+[_\w]*\s*{\s*" +
+                     "(" + obj_name_pat + ")\s+[^}]*}\s*" +
+                     "(" + obj_name_pat + ")\s*;", re.MULTILINE)
+    pos = 0
+    while pos < len(buf):
+        m = pat.search(buf, pos)
+        if not m: break
+        maybeobjdefs.append((m.group(2), m.group(1)))
+        pos = m.end()
+
+    # now find all structures that look like they might represent a class:
+    pat = re.compile("struct\s+_(" + obj_name_pat + ")Class\s*{\s*" +
+                     "(" + obj_name_pat + ")Class\s+", re.MULTILINE)
+    pos = 0
+    while pos < len(buf):
+        m = pat.search(buf, pos)
+        if not m: break
+        t = (m.group(1), m.group(2))
+        # if we find an object structure together with a corresponding
+        # class structure, then we have probably found a GtkObject subclass.
+        if t in maybeobjdefs:
+            objdefs.append(t)
+        pos = m.end()
+
+    pat = re.compile("typedef struct\s+[_\w]*\s*{\s*" +
+                     "(" + obj_name_pat + ")Class\s+[^}]*}\s*" +
+                     "(" + obj_name_pat + ")Class\s*;", re.MULTILINE)
+    pos = 0
+    while pos < len(buf):
+        m = pat.search(buf, pos)
+        if not m: break
+        t = (m.group(2), m.group(1))
+        # if we find an object structure together with a corresponding
+        # class structure, then we have probably found a GtkObject subclass.
+        if t in maybeobjdefs:
+            objdefs.append(t)
+        pos = m.end()
+
+    # now find all structures that look like they might represent
+    # a class inherited from GTypeInterface:
+    pat = re.compile("struct\s+_(" + obj_name_pat + ")Class\s*{\s*" +
+                     "GTypeInterface\s+", re.MULTILINE)
+    pos = 0
+    while pos < len(buf):
+        m = pat.search(buf, pos)
+        if not m: break
+        t = (m.group(1), '')
+        t2 = (m.group(1)+'Class', 'GTypeInterface')
+        # if we find an object structure together with a corresponding
+        # class structure, then we have probably found a GtkObject subclass.
+        if t2 in maybeobjdefs:
+            objdefs.append(t)
+        pos = m.end()
+
+    # now find all structures that look like they might represent
+    # an Iface inherited from GTypeInterface:
+    pat = re.compile("struct\s+_(" + obj_name_pat + ")Iface\s*{\s*" +
+                     "GTypeInterface\s+", re.MULTILINE)
+    pos = 0
+    while pos < len(buf):
+        m = pat.search(buf, pos)
+        if not m: break
+        t = (m.group(1), '')
+        t2 = (m.group(1)+'Iface', 'GTypeInterface')
+        # if we find an object structure together with a corresponding
+        # class structure, then we have probably found a GtkObject subclass.
+        if t2 in maybeobjdefs:
+            objdefs.append(t)
+        pos = m.end()
+
+def sort_obj_defs(objdefs):
+    objdefs.sort()  # not strictly needed, but looks nice
+    pos = 0
+    while pos < len(objdefs):
+        klass,parent = objdefs[pos]
+        for i in range(pos+1, len(objdefs)):
+            # parent below subclass ... reorder
+            if objdefs[i][0] == parent:
+                objdefs.insert(i+1, objdefs[pos])
+                del objdefs[pos]
+                break
+        else:
+            pos = pos + 1
+    return objdefs
+
+# ------------------ Find enum definitions -----------------
+
+def find_enum_defs(buf, enums=[]):
+    # strip comments
+    # bulk comments
+    buf = strip_comments(buf)
+
+    # strip dll api macros
+    buf = strip_dll_api(buf)
+
+    # strip # directives
+    pat = re.compile(r"""^[#].*?$""", re.MULTILINE)
+    buf = pat.sub('', buf)
+
+    buf = re.sub('\n', ' ', buf)
+
+    enum_pat = re.compile(r'enum\s*{([^}]*)}\s*([A-Z][A-Za-z]*)(\s|;)')
+    splitter = re.compile(r'\s*,\s', re.MULTILINE)
+    pos = 0
+    while pos < len(buf):
+        m = enum_pat.search(buf, pos)
+        if not m: break
+
+        name = m.group(2)
+        vals = m.group(1)
+        isflags = string.find(vals, '<<') >= 0
+        entries = []
+        for val in splitter.split(vals):
+            if not string.strip(val): continue
+            entries.append(string.split(val)[0])
+        if name != 'GdkCursorType':
+            enums.append((name, isflags, entries))
+
+        pos = m.end()
+
+# ------------------ Find function definitions -----------------
+
+def clean_func(buf):
+    """
+    Ideally would make buf have a single prototype on each line.
+    Actually just cuts out a good deal of junk, but leaves lines
+    where a regex can figure prototypes out.
+    """
+    # bulk comments
+    buf = strip_comments(buf)
+
+    # dll api
+    buf = strip_dll_api(buf)
+
+    # compact continued lines
+    pat = re.compile(r"""\\\n""", re.MULTILINE)
+    buf = pat.sub('', buf)
+
+    # Preprocess directives
+    pat = re.compile(r"""^[#].*?$""", re.MULTILINE)
+    buf = pat.sub('', buf)
+
+    #typedefs, stucts, and enums
+    pat = re.compile(r"""^(typedef|struct|enum)(\s|.|\n)*?;\s*""",
+                     re.MULTILINE)
+    buf = pat.sub('', buf)
+
+    #strip DECLS macros
+    pat = re.compile(r"""G_BEGIN_DECLS|BEGIN_LIBGTOP_DECLS""", re.MULTILINE)
+    buf = pat.sub('', buf)
+
+    #extern "C"
+    pat = re.compile(r"""^\s*(extern)\s+\"C\"\s+{""", re.MULTILINE)
+    buf = pat.sub('', buf)
+
+    #multiple whitespace
+    pat = re.compile(r"""\s+""", re.MULTILINE)
+    buf = pat.sub(' ', buf)
+
+    #clean up line ends
+    pat = re.compile(r""";\s*""", re.MULTILINE)
+    buf = pat.sub('\n', buf)
+    buf = buf.lstrip()
+
+    #associate *, &, and [] with type instead of variable
+    #pat = re.compile(r'\s+([*|&]+)\s*(\w+)')
+    pat = re.compile(r' \s* ([*|&]+) \s* (\w+)', re.VERBOSE)
+    buf = pat.sub(r'\1 \2', buf)
+    pat = re.compile(r'\s+ (\w+) \[ \s* \]', re.VERBOSE)
+    buf = pat.sub(r'[] \1', buf)
+
+    # make return types that are const work.
+    buf = re.sub(r'\s*\*\s*G_CONST_RETURN\s*\*\s*', '** ', buf)
+    buf = string.replace(buf, 'G_CONST_RETURN ', 'const-')
+    buf = string.replace(buf, 'const ', 'const-')
+
+    #strip GSEAL macros from the middle of function declarations:
+    pat = re.compile(r"""GSEAL""", re.VERBOSE)
+    buf = pat.sub('', buf)
+
+    return buf
+
+proto_pat=re.compile(r"""
+(?P<ret>(-|\w|\&|\*)+\s*)  # return type
+\s+                   # skip whitespace
+(?P<func>\w+)\s*[(]   # match the function name until the opening (
+\s*(?P<args>.*?)\s*[)]     # group the function arguments
+""", re.IGNORECASE|re.VERBOSE)
+#"""
+arg_split_pat = re.compile("\s*,\s*")
+
+get_type_pat = re.compile(r'(const-)?([A-Za-z0-9]+)\*?\s+')
+pointer_pat = re.compile('.*\*$')
+func_new_pat = re.compile('(\w+)_new$')
+
+class DefsWriter:
+    def __init__(self, fp=None, prefix=None, ns=None, verbose=False,
+                 defsfilter=None):
+        if not fp:
+            fp = sys.stdout
+
+        self.fp = fp
+        self.prefix = prefix
+        self.namespace = ns
+        self.verbose = verbose
+
+        self._enums = {}
+        self._objects = {}
+        self._functions = {}
+        if defsfilter:
+            filter = defsparser.DefsParser(defsfilter)
+            filter.startParsing()
+            for func in filter.functions + filter.methods.values():
+                self._functions[func.c_name] = func
+            for obj in filter.objects + filter.boxes + filter.interfaces:
+                self._objects[obj.c_name] = obj
+            for obj in filter.enums:
+                self._enums[obj.c_name] = obj
+
+    def write_def(self, deffile):
+        buf = open(deffile).read()
+
+        self.fp.write('\n;; From %s\n\n' % os.path.basename(deffile))
+        self._define_func(buf)
+        self.fp.write('\n')
+
+    def write_enum_defs(self, enums, fp=None):
+        if not fp:
+            fp = self.fp
+
+        fp.write(';; Enumerations and flags ...\n\n')
+        trans = string.maketrans(string.uppercase + '_',
+                                 string.lowercase + '-')
+        filter = self._enums
+        for cname, isflags, entries in enums:
+            if filter:
+                if cname in filter:
+                    continue
+            name = cname
+            module = None
+            if self.namespace:
+                module = self.namespace
+                name = cname[len(self.namespace):]
+            else:
+                m = split_prefix_pat.match(cname)
+                if m:
+                    module = m.group(1)
+                    name = m.group(2)
+            if isflags:
+                fp.write('(define-flags ' + name + '\n')
+            else:
+                fp.write('(define-enum ' + name + '\n')
+            if module:
+                fp.write('  (in-module "' + module + '")\n')
+            fp.write('  (c-name "' + cname + '")\n')
+            fp.write('  (gtype-id "' + typecode(cname, self.namespace) + '")\n')
+            prefix = entries[0]
+            for ent in entries:
+                # shorten prefix til we get a match ...
+                # and handle GDK_FONT_FONT, GDK_FONT_FONTSET case
+                while ((len(prefix) and prefix[-1] != '_') or ent[:len(prefix)] != prefix
+                       or len(prefix) >= len(ent)):
+                    prefix = prefix[:-1]
+            prefix_len = len(prefix)
+            fp.write('  (values\n')
+            for ent in entries:
+                fp.write('    \'("%s" "%s")\n' %
+                         (string.translate(ent[prefix_len:], trans), ent))
+            fp.write('  )\n')
+            fp.write(')\n\n')
+
+    def write_obj_defs(self, objdefs, fp=None):
+        if not fp:
+            fp = self.fp
+
+        fp.write(';; -*- scheme -*-\n')
+        fp.write('; object definitions ...\n')
+
+        filter = self._objects
+        for klass, parent in objdefs:
+            if filter:
+                if klass in filter:
+                    continue
+            if self.namespace:
+                cname = klass[len(self.namespace):]
+                cmodule = self.namespace
+            else:
+                m = split_prefix_pat.match(klass)
+                cname = klass
+                cmodule = None
+                if m:
+                    cmodule = m.group(1)
+                    cname = m.group(2)
+            fp.write('(define-object ' + cname + '\n')
+            if cmodule:
+                fp.write('  (in-module "' + cmodule + '")\n')
+            if parent:
+                fp.write('  (parent "' + parent + '")\n')
+            fp.write('  (c-name "' + klass + '")\n')
+            fp.write('  (gtype-id "' + typecode(klass, self.namespace) + '")\n')
+            # should do something about accessible fields
+            fp.write(')\n\n')
+
+    def _define_func(self, buf):
+        buf = clean_func(buf)
+        buf = string.split(buf,'\n')
+        filter = self._functions
+        for p in buf:
+            if not p:
+                continue
+            m = proto_pat.match(p)
+            if m == None:
+                if self.verbose:
+                    sys.stderr.write('No match:|%s|\n' % p)
+                continue
+            func = m.group('func')
+            if func[0] == '_':
+                continue
+            if filter:
+                if func in filter:
+                    continue
+            ret = m.group('ret')
+            args = m.group('args')
+            args = arg_split_pat.split(args)
+            for i in range(len(args)):
+                spaces = string.count(args[i], ' ')
+                if spaces > 1:
+                    args[i] = string.replace(args[i], ' ', '-', spaces - 1)
+
+            self._write_func(func, ret, args)
+
+    def _write_func(self, name, ret, args):
+        if len(args) >= 1:
+            # methods must have at least one argument
+            munged_name = name.replace('_', '')
+            m = get_type_pat.match(args[0])
+            if m:
+                obj = m.group(2)
+                if munged_name[:len(obj)] == obj.lower():
+                    self._write_method(obj, name, ret, args)
+                    return
+
+        if self.prefix:
+            l = len(self.prefix)
+            if name[:l] == self.prefix and name[l] == '_':
+                fname = name[l+1:]
+            else:
+                fname = name
+        else:
+            fname = name
+
+        # it is either a constructor or normal function
+        self.fp.write('(define-function ' + fname + '\n')
+        self.fp.write('  (c-name "' + name + '")\n')
+
+        # Hmmm... Let's asume that a constructor function name
+        # ends with '_new' and it returns a pointer.
+        m = func_new_pat.match(name)
+        if pointer_pat.match(ret) and m:
+            cname = ''
+            for s in m.group(1).split ('_'):
+                cname += s.title()
+            if cname != '':
+                self.fp.write('  (is-constructor-of "' + cname + '")\n')
+
+        self._write_return(ret)
+        self._write_arguments(args)
+
+    def _write_method(self, obj, name, ret, args):
+        regex = string.join(map(lambda x: x+'_?', string.lower(obj)),'')
+        mname = re.sub(regex, '', name, 1)
+        if self.prefix:
+            l = len(self.prefix) + 1
+            if mname[:l] == self.prefix and mname[l+1] == '_':
+                mname = mname[l+1:]
+        self.fp.write('(define-method ' + mname + '\n')
+        self.fp.write('  (of-object "' + obj + '")\n')
+        self.fp.write('  (c-name "' + name + '")\n')
+        self._write_return(ret)
+        self._write_arguments(args[1:])
+
+    def _write_return(self, ret):
+        if ret != 'void':
+            self.fp.write('  (return-type "' + ret + '")\n')
+        else:
+            self.fp.write('  (return-type "none")\n')
+
+    def _write_arguments(self, args):
+        is_varargs = 0
+        has_args = len(args) > 0
+        for arg in args:
+            if arg == '...':
+                is_varargs = 1
+            elif arg in ('void', 'void '):
+                has_args = 0
+        if has_args:
+            self.fp.write('  (parameters\n')
+            for arg in args:
+                if arg != '...':
+                    tupleArg = tuple(string.split(arg))
+                    if len(tupleArg) == 2:
+                        self.fp.write('    \'("%s" "%s")\n' % tupleArg)
+            self.fp.write('  )\n')
+        if is_varargs:
+            self.fp.write('  (varargs #t)\n')
+        self.fp.write(')\n\n')
+
+# ------------------ Main function -----------------
+
+def main(args):
+    verbose = False
+    onlyenums = False
+    onlyobjdefs = False
+    separate = False
+    modulename = None
+    namespace = None
+    defsfilter = None
+    opts, args = getopt.getopt(args[1:], 'vs:m:n:f:',
+                               ['onlyenums', 'onlyobjdefs',
+                                'modulename=', 'namespace=',
+                                'separate=', 'defsfilter='])
+    for o, v in opts:
+        if o == '-v':
+            verbose = True
+        if o == '--onlyenums':
+            onlyenums = True
+        if o == '--onlyobjdefs':
+            onlyobjdefs = True
+        if o in ('-s', '--separate'):
+            separate = v
+        if o in ('-m', '--modulename'):
+            modulename = v
+        if o in ('-n', '--namespace'):
+            namespace = v
+        if o in ('-f', '--defsfilter'):
+            defsfilter = v
+
+    if not args[0:1]:
+        print 'Must specify at least one input file name'
+        return -1
+
+    # read all the object definitions in
+    objdefs = []
+    enums = []
+    for filename in args:
+        buf = open(filename).read()
+        find_obj_defs(buf, objdefs)
+        find_enum_defs(buf, enums)
+    objdefs = sort_obj_defs(objdefs)
+
+    if separate:
+        methods = file(separate + '.defs', 'w')
+        types = file(separate + '-types.defs', 'w')
+
+        dw = DefsWriter(methods, prefix=modulename, ns=namespace,
+                        verbose=verbose, defsfilter=defsfilter)
+        dw.write_obj_defs(objdefs, types)
+        dw.write_enum_defs(enums, types)
+        print "Wrote %s-types.defs" % separate
+
+        for filename in args:
+            dw.write_def(filename)
+        print "Wrote %s.defs" % separate
+    else:
+        dw = DefsWriter(prefix=modulename, ns=namespace,
+                        verbose=verbose, defsfilter=defsfilter)
+
+        if onlyenums:
+            dw.write_enum_defs(enums)
+        elif onlyobjdefs:
+            dw.write_obj_defs(objdefs)
+        else:
+            dw.write_obj_defs(objdefs)
+            dw.write_enum_defs(enums)
+
+            for filename in args:
+                dw.write_def(filename)
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))
diff --git a/tools/defs_gen/scmexpr.py b/tools/defs_gen/scmexpr.py
new file mode 100755
index 00000000..02f2e4bf
--- /dev/null
+++ b/tools/defs_gen/scmexpr.py
@@ -0,0 +1,143 @@
+#!/usr/bin/env python
+# -*- Mode: Python; py-indent-offset: 4 -*-
+from __future__ import generators
+
+import string
+from cStringIO import StringIO
+
+class error(Exception):
+    def __init__(self, filename, lineno, msg):
+        Exception.__init__(self, msg)
+        self.filename = filename
+        self.lineno = lineno
+        self.msg = msg
+    def __str__(self):
+        return '%s:%d: error: %s' % (self.filename, self.lineno, self.msg)
+
+trans = [' '] * 256
+for i in range(256):
+    if chr(i) in string.letters + string.digits + '_':
+        trans[i] = chr(i)
+    else:
+        trans[i] = '_'
+trans = string.join(trans, '')
+
+def parse(filename):
+    if isinstance(filename, str):
+        fp = open(filename, 'r')
+    else: # if not string, assume it is some kind of iterator
+        fp = filename
+        filename = getattr(fp, 'name', '<unknown>')
+    whitespace = ' \t\n\r\x0b\x0c'
+    nonsymbol = whitespace + '();\'"'
+    stack = []
+    openlines = []
+    lineno = 0
+    for line in fp:
+        pos = 0
+        lineno += 1
+        while pos < len(line):
+            if line[pos] in whitespace: # ignore whitespace
+                pass
+            elif line[pos] == ';': # comment
+                break
+            elif line[pos:pos+2] == "'(":
+                pass # the open parenthesis will be handled next iteration
+            elif line[pos] == '(':
+                stack.append(())
+                openlines.append(lineno)
+            elif line[pos] == ')':
+                if len(stack) == 0:
+                    raise error(filename, lineno, 'close parenthesis found when none open')
+                closed = stack[-1]
+                del stack[-1]
+                del openlines[-1]
+                if stack:
+                    stack[-1] += (closed,)
+                else:
+                    yield closed
+            elif line[pos] == '"': # quoted string
+                if not stack:
+                    raise error(filename, lineno,
+                                'string found outside of s-expression')
+                endpos = pos + 1
+                chars = []
+                while endpos < len(line):
+                    if endpos+1 < len(line) and line[endpos] == '\\':
+                        endpos += 1
+                        if line[endpos] == 'n':
+                            chars.append('\n')
+                        elif line[endpos] == 'r':
+                            chars.append('\r')
+                        elif line[endpos] == 't':
+                            chars.append('\t')
+                        else:
+                            chars.append('\\')
+                            chars.append(line[endpos])
+                    elif line[endpos] == '"':
+                        break
+                    else:
+                        chars.append(line[endpos])
+                    endpos += 1
+                if endpos >= len(line):
+                    raise error(filename, lineno, "unclosed quoted string")
+                pos = endpos
+                stack[-1] += (''.join(chars),)
+            else: # symbol/number
+                if not stack:
+                    raise error(filename, lineno,
+                                'identifier found outside of s-expression')
+                endpos = pos
+                while endpos < len(line) and line[endpos] not in nonsymbol:
+                    endpos += 1
+                symbol = line[pos:endpos]
+                pos = max(pos, endpos-1)
+                try: symbol = int(symbol)
+                except ValueError:
+                    try: symbol = float(symbol)
+                    except ValueError: pass
+                stack[-1] += (symbol,)
+            pos += 1
+    if len(stack) != 0:
+        msg = '%d unclosed parentheses found at end of ' \
+              'file (opened on line(s) %s)' % (len(stack),
+                                               ', '.join(map(str, openlines)))
+        raise error(filename, lineno, msg)
+
+class Parser:
+    def __init__(self, filename):
+        """Argument is either a string, a parse tree, or file object"""
+        self.filename = filename
+    def startParsing(self, filename=None):
+        statements = parse(filename or self.filename)
+        for statement in statements:
+            self.handle(statement)
+    def handle(self, tup):
+        cmd = string.translate(tup[0], trans)
+        if hasattr(self, cmd):
+            getattr(self, cmd)(*tup[1:])
+        else:
+            self.unknown(tup)
+    def unknown(self, tup):
+        pass
+
+_testString = """; a scheme file
+(define-func gdk_font_load    ; a comment at end of line
+  GdkFont
+  ((string name)))
+
+(define-boxed GdkEvent
+  gdk_event_copy
+  gdk_event_free
+  "sizeof(GdkEvent)")
+"""
+
+if __name__ == '__main__':
+    import sys
+    if sys.argv[1:]:
+        fp = open(sys.argv[1])
+    else:
+        fp = StringIO(_testString)
+    statements = parse(fp)
+    for s in statements:
+        print `s`
author	Murray Cumming <murrayc@murrayc.com>	2011-07-19 10:58:59 +0200
committer	Murray Cumming <murrayc@murrayc.com>	2011-07-19 11:01:08 +0200
commit	be04a6c229efb204f96dbf9b50359ffe1b5e7d17 (patch)
tree	2515d0ffd25f6d2dd277b810baa66646831ecdcd
parent	7a6f7eef1c58d51e870aabada452ce2cb84a8ea5 (diff)
download	glibmm-be04a6c229efb204f96dbf9b50359ffe1b5e7d17.tar.gz