Add support for case conversion characters in regex substitutions.

Allow using Perl-like \l, \L, \u, \U and \E escape sequences in the substitution string used with %rename("%(regex:/pattern/subst/)s"). This is useful for e.g. title casing all string after removing some prefix. Closes #82
author: Vadim Zeitlin <vz-swig@zeitlins.org> 2013-08-30 17:01:01 +0200
committer: William S Fulton <wsf@fultondesigns.co.uk> 2013-10-15 07:17:56 +0100
commit: 72afb74f470841d9af504a8f85617c6a70aa837d (patch)
tree: 7738345ee23ac538eb81deace43782261f2c5ca5
parent: 669a27bb7b9d2da9c1758a5126d1db1f56299f08 (diff)
download: swig-72afb74f470841d9af504a8f85617c6a70aa837d.tar.gz
7 files changed, 108 insertions, 21 deletions
diff --git a/CHANGES.current b/CHANGES.current
index 350571e7b..db0a9899f 100644
--- a/CHANGES.current
+++ b/CHANGES.current
@@ -5,6 +5,10 @@ See the RELEASENOTES file for a summary of changes in each release.
 Version 3.0.0 (in progress)
 ============================
 
+2013-10-15: vadz
+            Allow using \l, \L, \u, \U and \E in the substitution part of %(regex:/pattern/subst/)
+            inside %rename to change the case of the text being replaced.
+
 2013-10-12: wsfulton
             [Lua] Apply #92 - missing return statements for SWIG_Lua_add_namespace_details()
             and SWIG_Lua_namespace_register().
diff --git a/Doc/Manual/SWIG.html b/Doc/Manual/SWIG.html
index f9ea5b2ef..c0cde0172 100644
--- a/Doc/Manual/SWIG.html
+++ b/Doc/Manual/SWIG.html
@@ -1888,11 +1888,22 @@ and a more descriptive one, but the two functions are otherwise equivalent:
     <i>pattern</i> part is a regular expression in Perl syntax (as supported
     by the <a href="http://www.pcre.org/">Perl Compatible Regular Expressions (PCRE)</a>)
     library and the <i>subst</i> string
-    can contain back-references introduced by <tt>'\'</tt> or, as backslashes need
-    to be escaped in C strings, rather by <tt>"\\"</tt>. For example, to remove
-    any alphabetic prefix before an underscore you could use the following directive:
-    <tt>%rename("regex:/(\\w+)_(.*)/\\2/")</tt></td>
-    <td><tt>Prefix_Print</tt></td><td><tt>Print</tt></td>
+    can contain back-references of the form <tt>\N</tt> where <tt>N</tt> is a digit
+    from 0 to 9, or one of the following escape sequences: <tt>\l</tt>, <tt>\L</tt>,
+    <tt>\u</tt>, <tt>\U</tt> or <tt>\E</tt>. The back-references are replaced with the
+    contents of the corresponding capture group while the escape sequences perform the
+    case conversion in the substitution string: <tt>\l</tt> and <tt>\L</tt> convert to
+    the lower case, while <tt>\u</tt> and <tt>\U</tt> convert to the upper case. The
+    difference between the elements of each pair is that <tt>\l</tt> and <tt>\u</tt>
+    change the case of the next character only, while <tt>\L</tt> and <tt>\U</tt> do
+    it for all the remaining characters or until <tt>\E</tt> is encountered.
+
+    Finally please notice that backslashes need to be escaped in C strings, so in
+    practice <tt>"\\"</tt> must be used in all these escape sequences. For example,
+    to remove any alphabetic prefix before an underscore and capitalize the remaining
+    part you could use the following directive:
+    <tt>%rename("regex:/(\\w+)_(.*)/\\u\\2/")</tt></td>
+    <td><tt>prefix_print</tt></td><td><tt>Print</tt></td>
 </tr>
 <tr>
     <td><tt>command:cmd</tt></td>
diff --git a/Examples/test-suite/csharp/rename_pcre_encoder_runme.cs b/Examples/test-suite/csharp/rename_pcre_encoder_runme.cs
index f6289e7e2..c06fb1387 100644
--- a/Examples/test-suite/csharp/rename_pcre_encoder_runme.cs
+++ b/Examples/test-suite/csharp/rename_pcre_encoder_runme.cs
@@ -4,9 +4,12 @@ using rename_pcre_encoderNamespace;
 public class runme {
   static void Main() {
     SomeWidget w = new SomeWidget();
-    w.putBorderWidth(17);
-    if ( w.getBorderWidth() != 17 )
+    w.put_borderWidth(17);
+    if ( w.get_borderWidth() != 17 )
       throw new Exception(String.Format("Border with should be 17, not {0}",
-                                        w.getBorderWidth()));
+                                        w.get_borderWidth()));
+
+    if ( rename_pcre_encoder.StartINSAneAndUNSAvoryTraNSAtlanticRaNSAck() != 42 )
+      throw new Exception("Unexpected result of renamed function call");
   }
 }
diff --git a/Examples/test-suite/java/rename_pcre_encoder_runme.java b/Examples/test-suite/java/rename_pcre_encoder_runme.java
index cb843338b..20882e21a 100644
--- a/Examples/test-suite/java/rename_pcre_encoder_runme.java
+++ b/Examples/test-suite/java/rename_pcre_encoder_runme.java
@@ -6,9 +6,11 @@ public class rename_pcre_encoder_runme {
   public static void main(String argv[])
   {
     SomeWidget w = new SomeWidget();
-    w.putBorderWidth(17);
-    if ( w.getBorderWidth() != 17 )
+    w.put_borderWidth(17);
+    if ( w.get_borderWidth() != 17 )
       throw new RuntimeException(String.format("Border with should be 17, not %d",
-                                               w.getBorderWidth()));
+                                               w.get_borderWidth()));
+    if ( rename_pcre_encoder.StartINSAneAndUNSAvoryTraNSAtlanticRaNSAck() != 42 )
+      throw new RuntimeException("Unexpected result of renamed function call");
   }
 }
diff --git a/Examples/test-suite/python/rename_pcre_encoder_runme.py b/Examples/test-suite/python/rename_pcre_encoder_runme.py
index 1186703a0..419acd1a1 100644
--- a/Examples/test-suite/python/rename_pcre_encoder_runme.py
+++ b/Examples/test-suite/python/rename_pcre_encoder_runme.py
@@ -1,13 +1,16 @@
 from rename_pcre_encoder import *
 
 s = SomeWidget()
-s.putBorderWidth(3)
-if s.getBorderWidth() != 3:
-    raise RuntimeError("Border should be 3, not %d" % (s.getBorderWidth(),))
+s.put_borderWidth(3)
+if s.get_borderWidth() != 3:
+    raise RuntimeError("Border should be 3, not %d" % (s.get_borderWidth(),))
 
-s.putSize(4, 5)
+s.put_size(4, 5)
 a = AnotherWidget()
 a.DoSomething()
 
 evt = wxEVTSomeEvent()
 t   = xUnchangedName()
+
+if StartINSAneAndUNSAvoryTraNSAtlanticRaNSAck() != 42:
+    raise RuntimeError("Unexpected result of renamed function call")
diff --git a/Examples/test-suite/rename_pcre_encoder.i b/Examples/test-suite/rename_pcre_encoder.i
index 66f30c7bc..b29b2a056 100644
--- a/Examples/test-suite/rename_pcre_encoder.i
+++ b/Examples/test-suite/rename_pcre_encoder.i
@@ -3,9 +3,14 @@
 // strip the wx prefix from all identifiers except those starting with wxEVT
 %rename("%(regex:/wx(?!EVT)(.*)/\\1/)s") "";
 
-// Replace "Set" and "Get" prefixes with "put" and "get" respectively.
-%rename("%(regex:/^Set(.*)/put\\1/)s", %$isfunction) "";
-%rename("%(regex:/^Get(.*)/get\\1/)s", %$isfunction) "";
+// Change "{Set,Get}Foo" naming convention to "{put,get}_foo" one.
+%rename("%(regex:/^Set(.*)/put_\\l\\1/)s", %$isfunction) "";
+%rename("%(regex:/^Get(.*)/get_\\l\\1/)s", %$isfunction) "";
+
+// Make some words stand out (unfortunately we don't have "global" flag): we
+// use \U to capitalize the second capture group and then \E to preserve the
+// case of the rest.
+%rename("%(regex:/(.*?)(nsa)(.*?)\\2(.*?)\\2(.*?)\\2(.*)/\\1\\U\\2\\E\\3\\U\\2\\E\\4\\U\\2\\E\\5\\U\\2\\E\\6/)s") "";
 
 %inline %{
 
@@ -28,4 +33,6 @@ class wxEVTSomeEvent {
 class xUnchangedName {
 };
 
+inline int StartInsaneAndUnsavoryTransatlanticRansack() { return 42; }
+
 %}
diff --git a/Source/Swig/misc.c b/Source/Swig/misc.c
index 596f6b424..769882bf8 100644
--- a/Source/Swig/misc.c
+++ b/Source/Swig/misc.c
@@ -1180,8 +1180,38 @@ err_out:
   exit(1);
 }
 
+/* This function copies len characters from src to dst, possibly applying case conversions to them: if convertCase is 1, to upper case and if it is -1, to lower
+ * case. If convertNextOnly is 1, only a single character is converted (and convertCase is reset), otherwise all of them are. */
+static void copy_with_maybe_case_conversion(String *dst, const char *src, int len, int *convertCase, int convertNextOnly)
+{
+  /* Deal with the trivial cases first. */
+  if (!len)
+    return;
+
+  if (!*convertCase) {
+      Write(dst, src, len);
+      return;
+  }
+
+  /* If we must convert only the first character, do it and write the rest at once. */
+  if (convertNextOnly) {
+    Putc(*convertCase == 1 ? toupper(*src) : tolower(*src), dst);
+    *convertCase = 0;
+    if (len > 1) {
+      Write(dst, src + 1, len - 1);
+    }
+  } else {
+    /* We need to convert all characters. */
+    int i;
+    for (i = 0; i < len; i++, src++) {
+      Putc(*convertCase == 1 ? toupper(*src) : tolower(*src), dst);
+    }
+  }
+}
+
 String *replace_captures(int num_captures, const char *input, String *subst, int captures[], String *pattern, String *s)
 {
+  int convertCase = 0, convertNextOnly = 0;
   String *result = NewStringEmpty();
   const char *p = Char(subst);
 
@@ -1189,10 +1219,10 @@ String *replace_captures(int num_captures, const char *input, String *subst, int
     /* Copy part without substitutions */
     const char *q = strchr(p, '\\');
     if (!q) {
-      Write(result, p, strlen(p));
+      copy_with_maybe_case_conversion(result, p, strlen(p), &convertCase, convertNextOnly);
       break;
     }
-    Write(result, p, q - p);
+    copy_with_maybe_case_conversion(result, p, q - p, &convertCase, convertNextOnly);
     p = q + 1;
 
     /* Handle substitution */
@@ -1203,12 +1233,39 @@ String *replace_captures(int num_captures, const char *input, String *subst, int
       if (group < num_captures) {
 	int l = captures[group*2], r = captures[group*2 + 1];
 	if (l != -1) {
-	  Write(result, input + l, r - l);
+	  copy_with_maybe_case_conversion(result, input + l, r - l, &convertCase, convertNextOnly);
 	}
       } else {
 	Swig_error("SWIG", Getline(s), "PCRE capture replacement failed while matching \"%s\" using \"%s\" - request for group %d is greater than the number of captures %d.\n",
 	    Char(pattern), input, group, num_captures-1);
       }
+    } else {
+	/* Handle Perl-like case conversion escapes. */
+	switch (*p) {
+	case 'u':
+	  convertCase = 1;
+	  convertNextOnly = 1;
+	  break;
+	case 'U':
+	  convertCase = 1;
+	  convertNextOnly = 0;
+	  break;
+	case 'l':
+	  convertCase = -1;
+	  convertNextOnly = 1;
+	  break;
+	case 'L':
+	  convertCase = -1;
+	  convertNextOnly = 0;
+	  break;
+	case 'E':
+	  convertCase = 0;
+	  break;
+	default:
+	  Swig_error("SWIG", Getline(s), "Unrecognized escape character '%c' in the replacement string \"%s\".\n",
+	      *p, Char(subst));
+	}
+	p++;
     }
   }
author	Vadim Zeitlin <vz-swig@zeitlins.org>	2013-08-30 17:01:01 +0200
committer	William S Fulton <wsf@fultondesigns.co.uk>	2013-10-15 07:17:56 +0100
commit	72afb74f470841d9af504a8f85617c6a70aa837d (patch)
tree	7738345ee23ac538eb81deace43782261f2c5ca5
parent	669a27bb7b9d2da9c1758a5126d1db1f56299f08 (diff)
download	swig-72afb74f470841d9af504a8f85617c6a70aa837d.tar.gz