diff options
| author | Lars Magne Ingebrigtsen <larsi@gnus.org> | 2010-09-10 18:44:35 +0200 |
|---|---|---|
| committer | Lars Magne Ingebrigtsen <larsi@gnus.org> | 2010-09-10 18:44:35 +0200 |
| commit | 381408e2192b8fd606babaa8c9a103186589d708 (patch) | |
| tree | 488a49b786d5cffcd0b068a527ec1ebe8339114a /src | |
| parent | 36f7d3666905e1447a2e80957735a1ade23c894c (diff) | |
| download | emacs-381408e2192b8fd606babaa8c9a103186589d708.tar.gz | |
Add support for the libxml2 library.
This adds the html-parse-string and xml-parse-string functions in the
new file src/xml.c, as well as autoconf detection of the library.
Diffstat (limited to 'src')
| -rw-r--r-- | src/ChangeLog | 10 | ||||
| -rw-r--r-- | src/Makefile.in | 10 | ||||
| -rw-r--r-- | src/config.in | 3 | ||||
| -rw-r--r-- | src/emacs.c | 4 | ||||
| -rw-r--r-- | src/lisp.h | 5 | ||||
| -rw-r--r-- | src/xml.c | 137 |
6 files changed, 166 insertions, 3 deletions
diff --git a/src/ChangeLog b/src/ChangeLog index 4b4f82aa4c8..9578130afd5 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,13 @@ +2010-09-09 Lars Magne Ingebrigtsen <larsi@gnus.org> + + * xml.c (Fxml_parse_buffer): New function to parse XML files. + +2010-09-08 Lars Magne Ingebrigtsen <larsi@gnus.org> + + * xml.c: New file. + (Fhtml_parse_buffer): New function to interface to the libxml2 + html parsing function. + 2010-09-05 Juanma Barranquero <lekktu@gmail.com> * biditype.h: Regenerate. diff --git a/src/Makefile.in b/src/Makefile.in index 9ee5631ef70..d91b95d86e3 100644 --- a/src/Makefile.in +++ b/src/Makefile.in @@ -226,6 +226,8 @@ RSVG_CFLAGS= @RSVG_CFLAGS@ IMAGEMAGICK_LIBS= @IMAGEMAGICK_LIBS@ IMAGEMAGICK_CFLAGS= @IMAGEMAGICK_CFLAGS@ +LIBXML2_LIBS = @LIBXML2_LIBS@ +LIBXML2_CFLAGS = @LIBXML2_CFLAGS@ ## widget.o if USE_X_TOOLKIT, otherwise empty. WIDGET_OBJ=@WIDGET_OBJ@ @@ -320,7 +322,8 @@ MKDEPDIR=@MKDEPDIR@ ## FIXME? MYCPPFLAGS only referenced in etc/DEBUG. ALL_CFLAGS=-Demacs -DHAVE_CONFIG_H $(MYCPPFLAGS) -I. -I${srcdir} \ ${C_SWITCH_MACHINE} ${C_SWITCH_SYSTEM} ${C_SWITCH_X_SITE} \ - ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} ${DBUS_CFLAGS} \ + ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} \ + ${LIBXML2_CFLAGS} ${DBUS_CFLAGS} \ ${GCONF_CFLAGS} ${FREETYPE_CFLAGS} ${FONTCONFIG_CFLAGS} \ ${LIBOTF_CFLAGS} ${M17N_FLT_CFLAGS} ${DEPFLAGS} ${PROFILING_CFLAGS} \ ${C_WARNINGS_SWITCH} ${CFLAGS} @@ -349,7 +352,7 @@ obj= dispnew.o frame.o scroll.o xdisp.o menu.o $(XMENU_OBJ) window.o \ syntax.o $(UNEXEC_OBJ) bytecode.o \ process.o callproc.o \ region-cache.o sound.o atimer.o \ - doprnt.o strftime.o intervals.o textprop.o composite.o md5.o \ + doprnt.o strftime.o intervals.o textprop.o composite.o md5.o xml.o \ $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ) ## Object files used on some machine or other. @@ -595,7 +598,8 @@ SOME_MACHINE_LISP = ../lisp/mouse.elc \ ## duplicated symbols. If the standard libraries were compiled ## with GCC, we might need LIB_GCC again after them. LIBES = $(LIBS) $(LIBX_BASE) $(LIBX_OTHER) $(LIBSOUND) \ - $(RSVG_LIBS) ${IMAGEMAGICK_LIBS} $(DBUS_LIBS) $(LIBGPM) $(LIBRESOLV) $(LIBS_SYSTEM) \ + $(RSVG_LIBS) ${IMAGEMAGICK_LIBS} $(DBUS_LIBS) \ + ${LIBXML2_LIBS} $(LIBGPM) $(LIBRESOLV) $(LIBS_SYSTEM) \ $(LIBS_TERMCAP) $(GETLOADAVG_LIBS) ${GCONF_LIBS} ${LIBSELINUX_LIBS} \ $(FREETYPE_LIBS) $(FONTCONFIG_LIBS) $(LIBOTF_LIBS) $(M17N_FLT_LIBS) \ $(LIB_GCC) $(LIB_MATH) $(LIB_STANDARD) $(LIB_GCC) diff --git a/src/config.in b/src/config.in index 604a737a8b0..199afbd78ba 100644 --- a/src/config.in +++ b/src/config.in @@ -813,6 +813,9 @@ along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ /* Define to 1 if you have the SM library (-lSM). */ #undef HAVE_X_SM +/* Define to 1 if you have the libxml2 library (-lxml2). */ +#undef HAVE_LIBXML2 + /* Define to 1 if you want to use the X window system. */ #undef HAVE_X_WINDOWS diff --git a/src/emacs.c b/src/emacs.c index 397d3d9ad27..33e0d60630b 100644 --- a/src/emacs.c +++ b/src/emacs.c @@ -1544,6 +1544,10 @@ main (int argc, char **argv) #endif #endif /* HAVE_X_WINDOWS */ +#ifdef HAVE_LIBXML2 + syms_of_xml (); +#endif + syms_of_menu (); #ifdef HAVE_NTGUI diff --git a/src/lisp.h b/src/lisp.h index 89514bf9ecb..fc9198a5ff7 100644 --- a/src/lisp.h +++ b/src/lisp.h @@ -3577,6 +3577,11 @@ extern char *x_get_keysym_name (int); EXFUN (Fmsdos_downcase_filename, 1); #endif +#ifdef HAVE_LIBXML2 +/* Defined in xml.c */ +extern void syms_of_xml (void); +#endif + #ifdef HAVE_MENUS /* Defined in (x|w32)fns.c, nsfns.m... */ extern int have_menus_p (void); diff --git a/src/xml.c b/src/xml.c new file mode 100644 index 00000000000..92066067d73 --- /dev/null +++ b/src/xml.c @@ -0,0 +1,137 @@ +/* Interface to libxml2. + Copyright (C) 2010 Free Software Foundation, Inc. + +This file is part of GNU Emacs. + +GNU Emacs is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#ifdef HAVE_LIBXML2 + +#include <setjmp.h> +#include <libxml/tree.h> +#include <libxml/parser.h> +#include <libxml/HTMLparser.h> + +#include "lisp.h" +#include "buffer.h" + +Lisp_Object make_dom (xmlNode *node) +{ + if (node->type == XML_ELEMENT_NODE) { + Lisp_Object result = Fcons (intern (node->name), Qnil); + xmlNode *child; + xmlAttr *property; + + /* First add the attributes. */ + property = node->properties; + while (property != NULL) { + if (property->children && + property->children->content) { + char *pname = xmalloc (strlen (property->name) + 2); + *pname = ':'; + strcpy(pname + 1, property->name); + result = Fcons (Fcons (intern (pname), + build_string(property->children->content)), + result); + xfree (pname); + } + property = property->next; + } + /* Then add the children of the node. */ + child = node->children; + while (child != NULL) { + result = Fcons (make_dom (child), result); + child = child->next; + } + return Fnreverse (result); + } else if (node->type == XML_TEXT_NODE) { + Lisp_Object content = Qnil; + + if (node->content) + content = build_string (node->content); + + return Fcons (intern (node->name), content); + } else + return Qnil; +} + +static Lisp_Object +parse_buffer (Lisp_Object string, Lisp_Object base_url, int htmlp) +{ + xmlDoc *doc; + xmlNode *node; + Lisp_Object result; + int ibeg, iend; + char *burl = ""; + + LIBXML_TEST_VERSION; + + CHECK_STRING (string); + + if (! NILP (base_url)) { + CHECK_STRING (base_url); + burl = SDATA (base_url); + } + + if (htmlp) + doc = htmlReadMemory (SDATA (string), SBYTES (string), burl, "utf-8", + HTML_PARSE_RECOVER|HTML_PARSE_NONET| + HTML_PARSE_NOWARNING|HTML_PARSE_NOERROR); + else + doc = xmlReadMemory (SDATA (string), SBYTES (string), burl, "utf-8", + XML_PARSE_NONET|XML_PARSE_NOWARNING| + XML_PARSE_NOERROR); + + if (doc != NULL) { + node = xmlDocGetRootElement (doc); + if (node != NULL) + result = make_dom (node); + + xmlFreeDoc (doc); + xmlCleanupParser (); + } + + return result; +} + +DEFUN ("html-parse-string", Fhtml_parse_string, Shtml_parse_string, + 0, 2, 0, + doc: /* Parse the string as an HTML document and return the parse tree.*/) + (Lisp_Object string, Lisp_Object base_url) +{ + return parse_buffer (string, base_url, 1); +} + +DEFUN ("xml-parse-string", Fxml_parse_string, Sxml_parse_string, + 0, 2, 0, + doc: /* Parse the string as an XML document and return the parse tree.*/) + (Lisp_Object string, Lisp_Object base_url) +{ + return parse_buffer (string, base_url, 0); +} + + +/*********************************************************************** + Initialization + ***********************************************************************/ +void +syms_of_xml (void) +{ + defsubr (&Shtml_parse_string); + defsubr (&Sxml_parse_string); +} + +#endif /* HAVE_LIBXML2 */ |
