summaryrefslogtreecommitdiff
path: root/libyelp/yelp-man-parser.c
diff options
context:
space:
mode:
authorRupert Swarbrick <rswarbrick@gmail.com>2011-01-04 21:18:25 +0000
committerShaun McCance <shaunm@gnome.org>2011-01-10 09:33:39 -0500
commit9f573bb2f548934a3a50537a0cd025664637781a (patch)
treeaf81786d11f3306f10bf3909a74a9d8d89646633 /libyelp/yelp-man-parser.c
parentb7dbde5fe25c639294e66fb2c3ecb92f84ee5af1 (diff)
downloadyelp-9f573bb2f548934a3a50537a0cd025664637781a.tar.gz
Cleverer heuristic on what constitutes a man link.
Diffstat (limited to 'libyelp/yelp-man-parser.c')
-rw-r--r--libyelp/yelp-man-parser.c20
1 files changed, 16 insertions, 4 deletions
diff --git a/libyelp/yelp-man-parser.c b/libyelp/yelp-man-parser.c
index 680fc892..4001d38c 100644
--- a/libyelp/yelp-man-parser.c
+++ b/libyelp/yelp-man-parser.c
@@ -70,6 +70,8 @@ struct _YelpManParser {
gchar *buffer; /* The buffer, line at a time */
gsize length; /* The buffer length */
+ gchar *section; /* The name of the current section */
+
/* The width and height of a character according to troff. */
guint char_width;
guint char_height;
@@ -443,6 +445,7 @@ yelp_man_parser_free (YelpManParser *parser)
}
g_string_free (parser->accumulator, TRUE);
g_free (parser->title_str);
+ g_free (parser->section);
g_free (parser);
}
@@ -689,7 +692,7 @@ parse_text (YelpManParser *parser, GError **error)
g_string_truncate (parser->accumulator, 0);
g_free (text);
- g_free (section);
+ parser->section = section;
}
return TRUE;
@@ -1096,6 +1099,7 @@ cleanup_parsed_page (YelpManParser *parser)
*/
gchar *lastline;
GRegex *regex;
+ gchar regex_string [1024];
if (xmlChildElementCount (parser->section_node) == 1) {
lastline = (gchar *)xmlNodeGetContent (parser->section_node);
@@ -1122,10 +1126,18 @@ cleanup_parsed_page (YelpManParser *parser)
/* Next job: Go through and stick the links in. Text that looks
* like man(1) should be converted to a link to man:man(1) and
* urls should also be linkified.
+ *
+ * Unfortunately, it's not entirely clear what constitutes a valid
+ * section. All sections must be alphanumeric and the logic we use
+ * to avoid extra hits (eg "one or more widget(s)") is that either
+ * the section must start with a digit or (if the current section
+ * doesn't) must start with the same letter as the current
+ * section.
*/
- regex = g_regex_new ("([a-zA-Z0-9\\-_.]+)"
- "\\(([a-zA-Z0-9]{1,2})\\)",
- 0, 0, NULL);
+ snprintf (regex_string, 1024,
+ "([a-zA-Z0-9\\-_.:]+)\\(((%c|[0-9])[a-zA-Z0-9]*)\\)",
+ parser->section ? parser->section[0] : '0');
+ regex = g_regex_new (regex_string, 0, 0, NULL);
g_return_if_fail (regex);
fixup_links (parser, regex, man_link_inserter);
g_regex_unref (regex);