maintain separate versions of C extension for Python2/3

--HG-- branch : python3 rename : source/str_util.c => source/str_util_python3.c
author: Ian Ward <ian@excess.org> 2011-01-11 13:43:14 -0500
committer: Ian Ward <ian@excess.org> 2011-01-11 13:43:14 -0500
commit: c0fed218232315d4dc8a3500ccaf2a2cbc7a99eb (patch)
tree: 392992a2a91c4d738803f0071c54912c3d4d55f6 /source
parent: 16ad617f3aeb9fc98719f40a2e3a9dd43f4e3c4a (diff)
download: urwid-c0fed218232315d4dc8a3500ccaf2a2cbc7a99eb.tar.gz
2 files changed, 894 insertions, 72 deletions
diff --git a/source/str_util.c b/source/str_util.c
index 76dc1ec..90b665f 100644
--- a/source/str_util.c
+++ b/source/str_util.c
@@ -176,8 +176,8 @@ text -- string text\n\
 pos -- position in text";
 
 
-static void Py_DecodeOne(const unsigned char *text, Py_ssize_t text_len,
-                         Py_ssize_t pos, Py_ssize_t *ret)
+static void Py_DecodeOne(const unsigned char *text, int text_len, int pos,
+                         int *ret)
 {
     int dummy;
     
@@ -309,17 +309,17 @@ static PyObject * decode_one(PyObject *self, PyObject *args)
 {
     PyObject *py_text;
     
-    Py_ssize_t pos, text_len;
+    int pos, text_len;
     char *text;
-    Py_ssize_t ret[2];
+    int ret[2];
     
-    if (!PyArg_ParseTuple(args, "On", &py_text, &pos))
+    if (!PyArg_ParseTuple(args, "Oi", &py_text, &pos))
         return NULL;
 
-    PyBytes_AsStringAndSize(py_text, &text, &text_len);
+    PyString_AsStringAndSize(py_text, &text, &text_len);
 
     Py_DecodeOne((unsigned char *)text, text_len, pos, ret);
-    return Py_BuildValue("(n, n)", ret[0], ret[1]);
+    return Py_BuildValue("(i, i)", ret[0], ret[1]);
 }
 
                                      
@@ -333,10 +333,10 @@ text -- text string \n\
 pos -- position in text";
 
 
-static void Py_DecodeOneRight(const unsigned char *text, Py_ssize_t text_len,
-                             Py_ssize_t pos, Py_ssize_t *ret)
+static void Py_DecodeOneRight(const unsigned char *text, int text_len, int pos,
+                             int *ret)
 {
-    Py_ssize_t subret[2];
+    int subret[2];
     
     while (pos >= 0)
     {
@@ -364,14 +364,14 @@ static PyObject * decode_one_right(PyObject *self, PyObject *args)
     
     PyObject *py_text;
 
-    Py_ssize_t pos, text_len;
+    int pos, text_len;
     char *text;
-    Py_ssize_t ret[2] = {'?',0};
+    int ret[2] = {'?',0};
     
-    if (!PyArg_ParseTuple(args, "On", &py_text, &pos))
+    if (!PyArg_ParseTuple(args, "Oi", &py_text, &pos))
         return NULL;
 
-    PyBytes_AsStringAndSize(py_text, &text, &text_len);
+    PyString_AsStringAndSize(py_text, &text, &text_len);
 
     Py_DecodeOneRight((const unsigned char *)text, text_len, pos, ret);
     return Py_BuildValue("(i, i)", ret[0], ret[1]);
@@ -391,10 +391,10 @@ Return values:\n\
 2 -- pos is on the 2nd half of a dbe char";
 
 
-static int Py_WithinDoubleByte(const unsigned char *str, Py_ssize_t line_start,
-                               Py_ssize_t pos)
+static int Py_WithinDoubleByte(const unsigned char *str, int line_start,
+                               int pos)
 {
-    Py_ssize_t i;
+    int i;
 
     if ((str[pos] >= 0x40) && (str[pos] < 0x7f))
     {
@@ -421,14 +421,14 @@ static int Py_WithinDoubleByte(const unsigned char *str, Py_ssize_t line_start,
 static PyObject * within_double_byte(PyObject *self, PyObject *args)
 {
     const unsigned char *str;
-    Py_ssize_t line_start, pos;
-    Py_ssize_t ret;
+    int line_start, pos;
+    int ret;
     
-    if (!PyArg_ParseTuple(args, "snn", &str, &line_start, &pos))
+    if (!PyArg_ParseTuple(args, "sii", &str, &line_start, &pos))
         return NULL;
 
     ret = Py_WithinDoubleByte(str, line_start, pos);
-    return Py_BuildValue("n", ret);
+    return Py_BuildValue("i", ret);
 }
 
 
@@ -439,11 +439,11 @@ Test if the character at offs within text is wide.\n\n\
 text -- string or unicode text\n\
 offs -- offset";
 
-static int Py_IsWideChar(PyObject *text, Py_ssize_t offs)
+static int Py_IsWideChar(PyObject *text, int offs)
 {
     const unsigned char *str;
     Py_UNICODE *ustr;
-    Py_ssize_t ret[2], str_len;
+    int ret[2], str_len;
         
     if (PyUnicode_Check(text))  //text_py is unicode string
     {
@@ -458,8 +458,8 @@ static int Py_IsWideChar(PyObject *text, Py_ssize_t offs)
         return -1;
     }
     
-    str = (const unsigned char *)PyBytes_AsString(text);
-    str_len = (int) PyBytes_Size(text);
+    str = (const unsigned char *)PyString_AsString(text);
+    str_len = (int) PyString_Size(text);
 
     if (byte_encoding == ENC_UTF8)
     {
@@ -477,10 +477,10 @@ static int Py_IsWideChar(PyObject *text, Py_ssize_t offs)
 static PyObject * is_wide_char(PyObject *self, PyObject *args)
 {
     PyObject *text;
-    Py_ssize_t offs;
+    int offs;
     int ret;
     
-    if (!PyArg_ParseTuple(args, "On", &text, &offs))
+    if (!PyArg_ParseTuple(args, "Oi", &text, &offs))
         return NULL;
 
     ret = Py_IsWideChar(text, offs);
@@ -501,16 +501,16 @@ start_offs -- start offset\n\
 end_offs -- end offset";
 
 
-static Py_ssize_t Py_MovePrevChar(PyObject *text, Py_ssize_t start_offs,
-                           Py_ssize_t end_offs)
+static int Py_MovePrevChar(PyObject *text, int start_offs,
+                           int end_offs)
 {
-    Py_ssize_t position;
+    int position;
     unsigned char *str;
     
     if (PyUnicode_Check(text))  //text_py is unicode string
         return end_offs-1;
     else
-        str = (unsigned char *)PyBytes_AsString(text);
+        str = (unsigned char *)PyString_AsString(text);
     
     if (byte_encoding == ENC_UTF8) //encoding is utf8
     {
@@ -530,14 +530,14 @@ static Py_ssize_t Py_MovePrevChar(PyObject *text, Py_ssize_t start_offs,
 static PyObject * move_prev_char(PyObject *self, PyObject *args)
 {
     PyObject *text;
-    Py_ssize_t start_offs, end_offs;
-    Py_ssize_t ret;
+    int start_offs, end_offs;
+    int ret;
 
-    if (!PyArg_ParseTuple(args, "Onn", &text, &start_offs, &end_offs))
+    if (!PyArg_ParseTuple(args, "Oii", &text, &start_offs, &end_offs))
         return NULL; 
 
     ret = Py_MovePrevChar(text, start_offs, end_offs);
-    return Py_BuildValue("n", ret);
+    return Py_BuildValue("i", ret);
 }
 
 
@@ -550,16 +550,16 @@ start_offs -- start offset\n\
 end_offs -- end offset";
 
 
-static Py_ssize_t Py_MoveNextChar(PyObject *text, Py_ssize_t start_offs,
-                           Py_ssize_t end_offs)
+static int Py_MoveNextChar(PyObject *text, int start_offs,
+                           int end_offs)
 {
-    Py_ssize_t position;
+    int position;
     unsigned char * str;
 
     if (PyUnicode_Check(text))  //text_py is unicode string
         return start_offs+1;
     else
-        str = (unsigned char *)PyBytes_AsString(text);
+        str = (unsigned char *)PyString_AsString(text);
     
     if (byte_encoding == ENC_UTF8) //encoding is utf8
     {
@@ -580,14 +580,14 @@ static Py_ssize_t Py_MoveNextChar(PyObject *text, Py_ssize_t start_offs,
 static PyObject * move_next_char(PyObject *self, PyObject *args)
 {
     PyObject *text;
-    Py_ssize_t start_offs, end_offs;
-    Py_ssize_t ret;
+    int start_offs, end_offs;
+    int ret;
 
-    if (!PyArg_ParseTuple(args, "Onn", &text, &start_offs, &end_offs))
+    if (!PyArg_ParseTuple(args, "Oii", &text, &start_offs, &end_offs))
         return NULL; 
 
     ret = Py_MoveNextChar(text, start_offs, end_offs);
-    return Py_BuildValue("n", ret);
+    return Py_BuildValue("i", ret);
 }
 
 
@@ -600,12 +600,10 @@ start_offs -- start offset\n\
 end_offs -- end offset";
 
 
-static Py_ssize_t Py_CalcWidth(PyObject *text, Py_ssize_t start_offs,
-                        Py_ssize_t end_offs)
+static int Py_CalcWidth(PyObject *text, int start_offs, int end_offs)
 {
     unsigned char * str;
-    Py_ssize_t i, ret[2], str_len;
-    int screencols;
+    int i, screencols, ret[2], str_len;
     Py_UNICODE *ustr;
 
     if (PyUnicode_Check(text))  //text_py is unicode string
@@ -619,14 +617,14 @@ static Py_ssize_t Py_CalcWidth(PyObject *text, Py_ssize_t start_offs,
         return screencols;
     }
 
-    if (!PyBytes_Check(text))
+    if (!PyString_Check(text))
     {
         PyErr_SetString(PyExc_TypeError, "Neither unicode nor string.");
         return -1;
     }
 
-    str = (unsigned char *)PyBytes_AsString(text);
-    str_len = PyBytes_Size(text);
+    str = (unsigned char *)PyString_AsString(text);
+    str_len = (int) PyString_Size(text);
 
     if (byte_encoding == ENC_UTF8)
     {
@@ -677,12 +675,11 @@ end_offs -- end offset\n\
 pref_col -- preferred column";
 
 
-static int Py_CalcTextPos(PyObject *text, Py_ssize_t start_offs,
-                          Py_ssize_t end_offs, int pref_col, Py_ssize_t *ret)
+static int Py_CalcTextPos(PyObject *text, int start_offs, int end_offs,
+                          int pref_col, int *ret)
 {
     unsigned char * str;
-    Py_ssize_t i, dummy[2], str_len;
-    int screencols, width;
+    int i, screencols, dummy[2], str_len, width;
     Py_UNICODE *ustr;
 
     if (PyUnicode_Check(text))  //text_py is unicode string
@@ -709,14 +706,14 @@ static int Py_CalcTextPos(PyObject *text, Py_ssize_t start_offs,
         return 0;
     }
 
-    if (!PyBytes_Check(text))
+    if (!PyString_Check(text))
     {
         PyErr_SetString(PyExc_TypeError, "Neither unicode nor string.");
         return -1;
     }
 
-    str = (unsigned char *)PyBytes_AsString(text);
-    str_len = PyBytes_Size(text);
+    str = (unsigned char *)PyString_AsString(text);
+    str_len = (int) PyString_Size(text);
     
     if (byte_encoding == ENC_UTF8)
     {
@@ -767,10 +764,10 @@ static int Py_CalcTextPos(PyObject *text, Py_ssize_t start_offs,
 static PyObject * calc_text_pos(PyObject *self, PyObject *args)
 {
     PyObject *text;
-    Py_ssize_t start_offs, end_offs, ret[2];
-    int pref_col, err;
+    int start_offs, end_offs, pref_col;
+    int ret[2], err;
 
-    if (!PyArg_ParseTuple(args, "Onni", &text, &start_offs, &end_offs,
+    if (!PyArg_ParseTuple(args, "Oiii", &text, &start_offs, &end_offs,
                           &pref_col))
         return NULL; 
 
@@ -778,7 +775,7 @@ static PyObject * calc_text_pos(PyObject *self, PyObject *args)
     if (err==-1) //an error occured
         return NULL;
                       
-    return Py_BuildValue("(nn)", ret[0], ret[1]);
+    return Py_BuildValue("(ii)", ret[0], ret[1]);
 }
 
 
@@ -802,21 +799,14 @@ static PyMethodDef Str_UtilMethods[] = {
     {NULL, NULL, 0, NULL}        // Sentinel 
 };
 
-static struct PyModuleDef Str_UtilModule = {
-    PyModuleDef_HEAD_INIT,
-    "str_util",
-    NULL,
-    -1,
-    Str_UtilMethods
-};
 
-PyMODINIT_FUNC PyInit_str_util(void)
+PyMODINIT_FUNC initstr_util(void)
 {
-    return PyModule_Create(&Str_UtilModule);
+    Py_InitModule("str_util", Str_UtilMethods);
 }
 
 
-/*
+
 int main(int argc, char *argv[])
 {
     //Pass argv[0] to the Python interpreter:
@@ -830,4 +820,3 @@ int main(int argc, char *argv[])
 
     return 0;
 }
-*/
diff --git a/source/str_util_python3.c b/source/str_util_python3.c
new file mode 100644
index 0000000..76dc1ec
--- /dev/null
+++ b/source/str_util_python3.c
@@ -0,0 +1,833 @@
+/*  Urwid unicode character processing tables
+
+    Copyright (C) 2006 Rebecca Breu.
+    This file contains rewritten code of utable.py by Ian Ward.
+
+    This library is free software; you can redistribute it and/or
+    modify it under the terms of the GNU Lesser General Public
+    License as published by the Free Software Foundation; either
+    version 2.1 of the License, or (at your option) any later version.
+
+    This library is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+    Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public
+    License along with this library; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+    Urwid web site: http://excess.org/urwid/
+*/
+
+
+#include <Python.h>
+
+#define ENC_UTF8 1
+#define ENC_WIDE 2
+#define ENC_NARROW 3
+
+
+static int widths_len = 2*38;
+static const long int widths[] = {
+    126, 1,
+    159, 0,
+    687, 1,
+    710, 0,
+    711, 1,
+    727, 0,
+    733, 1,
+    879, 0,
+    1154, 1,
+    1161, 0,
+    4347, 1,
+    4447, 2,
+    7467, 1,
+    7521, 0,
+    8369, 1,
+    8426, 0,
+    9000, 1,
+    9002, 2,
+    11021, 1,
+    12350, 2,
+    12351, 1,
+    12438, 2,
+    12442, 0,
+    19893, 2,
+    19967, 1,
+    55203, 2,
+    63743, 1,
+    64106, 2,
+    65039, 1,
+    65059, 0,
+    65131, 2,
+    65279, 1,
+    65376, 2,
+    65500, 1,
+    65510, 2,
+    120831, 1,
+    262141, 2,
+    1114109, 1
+};
+
+
+static short byte_encoding = ENC_UTF8;
+
+
+static PyObject * to_bool(int val)
+{
+    if (val)  Py_RETURN_TRUE;
+    else  Py_RETURN_FALSE;
+}
+
+
+//======================================================================
+static char get_byte_encoding_doc[] =
+"get_byte_encoding() -> string encoding\n\n\
+Get byte encoding ('utf8', 'wide', or 'narrow').";
+
+static PyObject * get_byte_encoding(PyObject *self, PyObject *args)
+{
+    if (!PyArg_ParseTuple(args, ""))
+        return NULL;
+
+    if (byte_encoding == ENC_UTF8)
+        return Py_BuildValue("s", "utf8");
+    if (byte_encoding == ENC_WIDE)
+        return Py_BuildValue("s", "wide");
+    if (byte_encoding == ENC_NARROW)
+        return Py_BuildValue("s", "narrow");
+    return Py_None; // should never happen
+}
+
+
+//======================================================================
+static char set_byte_encoding_doc[] =
+"set_byte_encoding(string encoding) -> None\n\n\
+Set byte encoding. \n\n\
+encoding -- one of 'utf8', 'wide', 'narrow'";
+
+static PyObject * set_byte_encoding(PyObject *self, PyObject *args)
+{
+    char * enc;
+    
+    if (!PyArg_ParseTuple(args, "s", &enc))
+        return NULL;
+
+    if (strcmp(enc, "utf8") == 0)
+        byte_encoding = ENC_UTF8;
+    else if (strcmp(enc, "wide") == 0)
+        byte_encoding = ENC_WIDE;
+    else if (strcmp(enc, "narrow") == 0)
+        byte_encoding = ENC_NARROW;
+    else
+    {
+        // got wrong encoding
+        PyErr_SetString(PyExc_ValueError, "Unknown encoding.");
+        return NULL;
+    }
+    
+    return Py_None;
+}
+
+
+//======================================================================
+static char get_width_doc[] =
+"get_width(int ord) -> int width\n\n\
+Return the screen column width for unicode ordinal ord.\n\n\
+ord -- ordinal";
+
+
+static int Py_GetWidth(long int ord)
+{
+    int i;
+
+    if ((ord == 0xe) || (ord == 0xf))
+            return 0;
+
+    for (i=0; i<widths_len; i+=2)
+    {
+        if (ord <= widths[i])
+            return widths[i+1];
+    }
+    
+    return 1;
+}
+
+
+static PyObject * get_width(PyObject *self, PyObject *args)
+{
+    long int ord;
+    int ret;
+    
+    if (!PyArg_ParseTuple(args, "l", &ord))
+        return NULL;
+
+    ret = Py_GetWidth(ord);
+    return Py_BuildValue("i", ret);
+}
+
+
+//======================================================================
+static char decode_one_doc[] =
+"decode_one(string text, int pos) -> (int ord, int nextpos)\n\n\
+Return (ordinal at pos, next position) for UTF-8 encoded text.\n\n\
+text -- string text\n\
+pos -- position in text";
+
+
+static void Py_DecodeOne(const unsigned char *text, Py_ssize_t text_len,
+                         Py_ssize_t pos, Py_ssize_t *ret)
+{
+    int dummy;
+    
+    if (!(text[pos]&0x80))
+    {
+        ret[0] = text[pos];
+        ret[1] = pos+1;
+        return;
+    }
+
+    if (text_len - pos < 2) //error
+    {
+        ret[0] = '?';
+        ret[1] = pos+1;
+        return;
+    }
+
+    if ((text[pos]&0xe0) == 0xc0)
+    {
+        if ((text[pos+1]&0xc0) != 0x80) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos+1;
+            return;
+        }
+
+        dummy = ((text[pos]&0x1f)<<6) | (text[pos+1]&0x3f);
+        if (dummy < 0x80) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos+1;
+            return;
+        }
+
+        ret[0] = dummy;
+        ret[1] = pos+2;
+        return;
+    }
+    
+    if (text_len - pos < 3) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos + 1;
+            return;
+        }
+
+    if ((text[pos]&0xf0) == 0xe0)
+    {
+        if ((text[pos+1]&0xc0) != 0x80) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos + 1;
+            return;
+        }
+        
+        if ((text[pos+2]&0xc0) != 0x80) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos + 1;
+            return;
+        }
+
+        dummy = ((text[pos]&0x0f) << 12) | ((text[pos+1]&0x3f) << 6) |
+            (text[pos+2]&0x3f);
+        if (dummy < 0x800) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos + 1;
+            return;
+        }
+
+        ret[0] = dummy;
+        ret[1] = pos + 3;
+        return;
+    }
+
+    if (text_len - pos < 4)
+    {
+        ret[0] = '?';
+        ret[1] = pos + 1;
+        return;
+    }
+
+    if ((text[pos]&0xf8) == 0xf0)
+    {
+        if ((text[pos+1]&0xc0) != 0x80) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos + 1;
+            return;
+        }
+        
+        if ((text[pos+2]&0xc0) != 0x80) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos + 1;
+            return;
+        }
+
+        if ((text[pos+3]&0xc0) != 0x80) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos + 1;
+            return;
+        }
+
+        dummy = ((text[pos]&0x07) << 18) | ((text[pos+1]&0x3f) << 12) |
+            ((text[pos+2]&0x3f) << 6) | (text[pos+3]&0x3f);
+        if (dummy < 0x10000) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos + 1;
+            return;
+        }
+             
+        ret[0] = dummy;
+        ret[1] = pos + 4;
+        return;
+    }
+        
+    ret[0] = '?';
+    ret[1] = pos + 1;
+    return;
+    
+}
+
+
+static PyObject * decode_one(PyObject *self, PyObject *args)
+{
+    PyObject *py_text;
+    
+    Py_ssize_t pos, text_len;
+    char *text;
+    Py_ssize_t ret[2];
+    
+    if (!PyArg_ParseTuple(args, "On", &py_text, &pos))
+        return NULL;
+
+    PyBytes_AsStringAndSize(py_text, &text, &text_len);
+
+    Py_DecodeOne((unsigned char *)text, text_len, pos, ret);
+    return Py_BuildValue("(n, n)", ret[0], ret[1]);
+}
+
+                                     
+
+//======================================================================
+static char decode_one_right_doc[] =
+"decode_one_right(string text, int pos) -> (int ord, int nextpos)\n\n\
+Return (ordinal at pos, next position) for UTF-8 encoded text.\n\
+pos is assumed to be on the trailing byte of a utf-8 sequence.\n\
+text -- text string \n\
+pos -- position in text";
+
+
+static void Py_DecodeOneRight(const unsigned char *text, Py_ssize_t text_len,
+                             Py_ssize_t pos, Py_ssize_t *ret)
+{
+    Py_ssize_t subret[2];
+    
+    while (pos >= 0)
+    {
+        if ((text[pos]&0xc0) != 0x80)
+        {
+            Py_DecodeOne(text, text_len, pos, subret);
+            ret[0] = subret[0];
+            ret[1] = pos-1;
+            return;
+        }
+        pos-=1;
+        
+        if (pos == pos-4) //error
+        {
+            ret[0] = '?';
+            ret[1] = pos - 1;
+            return;
+        }
+    }
+}
+
+
+static PyObject * decode_one_right(PyObject *self, PyObject *args)
+{
+    
+    PyObject *py_text;
+
+    Py_ssize_t pos, text_len;
+    char *text;
+    Py_ssize_t ret[2] = {'?',0};
+    
+    if (!PyArg_ParseTuple(args, "On", &py_text, &pos))
+        return NULL;
+
+    PyBytes_AsStringAndSize(py_text, &text, &text_len);
+
+    Py_DecodeOneRight((const unsigned char *)text, text_len, pos, ret);
+    return Py_BuildValue("(i, i)", ret[0], ret[1]);
+}
+
+
+//======================================================================
+static char within_double_byte_doc[] =
+"within_double_byte(strint text, int line_start, int pos) -> int withindb\n\n\
+Return whether pos is within a double-byte encoded character.\n\n\
+str -- string in question\n\
+line_start -- offset of beginning of line (< pos)\n\
+pos -- offset in question\n\n\
+Return values:\n\
+0 -- not within dbe char, or double_byte_encoding == False\n\
+1 -- pos is on the 1st half of a dbe char\n\
+2 -- pos is on the 2nd half of a dbe char";
+
+
+static int Py_WithinDoubleByte(const unsigned char *str, Py_ssize_t line_start,
+                               Py_ssize_t pos)
+{
+    Py_ssize_t i;
+
+    if ((str[pos] >= 0x40) && (str[pos] < 0x7f))
+    {
+        //might be second half of big5, uhc or gbk encoding
+        if (pos == line_start)  return 0;
+
+        if (str[pos-1] >= 0x81)
+        {
+            if ((Py_WithinDoubleByte(str, line_start, pos-1)) == 1)  return 2;
+            else return 0;
+        }
+    }
+
+    if (str[pos] < 0x80)  return 0;
+    
+    for (i=pos-1; i>=line_start; i--)
+        if (str[i] < 0x80)  break;
+    
+    if ((pos-i) & 1)  return 1;
+    else  return 2;
+}
+
+
+static PyObject * within_double_byte(PyObject *self, PyObject *args)
+{
+    const unsigned char *str;
+    Py_ssize_t line_start, pos;
+    Py_ssize_t ret;
+    
+    if (!PyArg_ParseTuple(args, "snn", &str, &line_start, &pos))
+        return NULL;
+
+    ret = Py_WithinDoubleByte(str, line_start, pos);
+    return Py_BuildValue("n", ret);
+}
+
+
+//======================================================================
+char is_wide_char_doc[] =
+"is_wide_char(string/unicode text, int offs) -> bool iswide\n\n\
+Test if the character at offs within text is wide.\n\n\
+text -- string or unicode text\n\
+offs -- offset";
+
+static int Py_IsWideChar(PyObject *text, Py_ssize_t offs)
+{
+    const unsigned char *str;
+    Py_UNICODE *ustr;
+    Py_ssize_t ret[2], str_len;
+        
+    if (PyUnicode_Check(text))  //text_py is unicode string
+    {
+        ustr = PyUnicode_AS_UNICODE(text);
+        return (Py_GetWidth((long int)ustr[offs]) == 2);
+    }
+
+    if ( text->ob_type != Py_BuildValue("s","")->ob_type ) {
+
+        PyErr_SetString(PyExc_TypeError,
+            "is_wide_char: Argument \"text\" is not a string.");
+        return -1;
+    }
+    
+    str = (const unsigned char *)PyBytes_AsString(text);
+    str_len = (int) PyBytes_Size(text);
+
+    if (byte_encoding == ENC_UTF8)
+    {
+        Py_DecodeOne(str, str_len, offs, ret);
+        return (Py_GetWidth(ret[0]) == 2);
+    }
+
+    if (byte_encoding == ENC_WIDE)
+        return (Py_WithinDoubleByte(str, offs, offs) == 1);
+
+    return 0;
+}
+
+
+static PyObject * is_wide_char(PyObject *self, PyObject *args)
+{
+    PyObject *text;
+    Py_ssize_t offs;
+    int ret;
+    
+    if (!PyArg_ParseTuple(args, "On", &text, &offs))
+        return NULL;
+
+    ret = Py_IsWideChar(text, offs);
+
+    if ( ret == -1) // error
+        return NULL;
+
+    return Py_BuildValue("O", to_bool(ret));
+}
+
+
+//======================================================================
+char move_prev_char_doc[] =
+"move_prev_char(string/unicode text, int start_offs, int end_offs) -> int pos\n\n\
+Return the position of the character before end_offs.\n\n\
+text -- string or unicode text\n\
+start_offs -- start offset\n\
+end_offs -- end offset";
+
+
+static Py_ssize_t Py_MovePrevChar(PyObject *text, Py_ssize_t start_offs,
+                           Py_ssize_t end_offs)
+{
+    Py_ssize_t position;
+    unsigned char *str;
+    
+    if (PyUnicode_Check(text))  //text_py is unicode string
+        return end_offs-1;
+    else
+        str = (unsigned char *)PyBytes_AsString(text);
+    
+    if (byte_encoding == ENC_UTF8) //encoding is utf8
+    {
+        position = end_offs - 1;
+        while ((str[position]&0xc0) == 0x80)
+            position -=1;
+        return position;
+    }
+    else if ((byte_encoding == ENC_WIDE) &&
+             (Py_WithinDoubleByte(str, start_offs, end_offs-1) == 2))
+        return end_offs-2;
+    else
+        return end_offs-1;
+}
+
+
+static PyObject * move_prev_char(PyObject *self, PyObject *args)
+{
+    PyObject *text;
+    Py_ssize_t start_offs, end_offs;
+    Py_ssize_t ret;
+
+    if (!PyArg_ParseTuple(args, "Onn", &text, &start_offs, &end_offs))
+        return NULL; 
+
+    ret = Py_MovePrevChar(text, start_offs, end_offs);
+    return Py_BuildValue("n", ret);
+}
+
+
+//======================================================================
+char move_next_char_doc[] =
+"move_next_char(string/unicode text, int start_offs, int end_offs) -> int pos\n\n\
+Return the position of the character after start_offs.\n\n\
+text -- string or unicode text\n\
+start_offs -- start offset\n\
+end_offs -- end offset";
+
+
+static Py_ssize_t Py_MoveNextChar(PyObject *text, Py_ssize_t start_offs,
+                           Py_ssize_t end_offs)
+{
+    Py_ssize_t position;
+    unsigned char * str;
+
+    if (PyUnicode_Check(text))  //text_py is unicode string
+        return start_offs+1;
+    else
+        str = (unsigned char *)PyBytes_AsString(text);
+    
+    if (byte_encoding == ENC_UTF8) //encoding is utf8
+    {
+        position = start_offs + 1;
+        while ((position < end_offs) && ((str[position]&0xc0) == 0x80))
+            position +=1;
+
+         return position;
+    }
+    else if ((byte_encoding == ENC_WIDE) &&
+             (Py_WithinDoubleByte(str, start_offs, start_offs) == 1))
+        return start_offs+2;
+    else
+        return start_offs+1;
+}
+
+
+static PyObject * move_next_char(PyObject *self, PyObject *args)
+{
+    PyObject *text;
+    Py_ssize_t start_offs, end_offs;
+    Py_ssize_t ret;
+
+    if (!PyArg_ParseTuple(args, "Onn", &text, &start_offs, &end_offs))
+        return NULL; 
+
+    ret = Py_MoveNextChar(text, start_offs, end_offs);
+    return Py_BuildValue("n", ret);
+}
+
+
+//======================================================================
+char calc_width_doc[] =
+"calc_width(string/unicode text, int start_off, int end_offs) -> int width\n\n\
+Return the screen column width of text between start_offs and end_offs.\n\n\
+text -- string or unicode text\n\
+start_offs -- start offset\n\
+end_offs -- end offset";
+
+
+static Py_ssize_t Py_CalcWidth(PyObject *text, Py_ssize_t start_offs,
+                        Py_ssize_t end_offs)
+{
+    unsigned char * str;
+    Py_ssize_t i, ret[2], str_len;
+    int screencols;
+    Py_UNICODE *ustr;
+
+    if (PyUnicode_Check(text))  //text_py is unicode string
+    {
+        ustr = PyUnicode_AS_UNICODE(text);
+        screencols = 0;
+ 
+        for(i=start_offs; i<end_offs; i++) 
+            screencols += Py_GetWidth(ustr[i]);
+
+        return screencols;
+    }
+
+    if (!PyBytes_Check(text))
+    {
+        PyErr_SetString(PyExc_TypeError, "Neither unicode nor string.");
+        return -1;
+    }
+
+    str = (unsigned char *)PyBytes_AsString(text);
+    str_len = PyBytes_Size(text);
+
+    if (byte_encoding == ENC_UTF8)
+    {
+        i = start_offs;
+        screencols = 0;
+
+        while (i<end_offs) {
+            Py_DecodeOne(str, str_len, i, ret);
+            screencols += Py_GetWidth(ret[0]);
+            i = ret[1];
+        }
+
+        return screencols;
+    }
+
+    return end_offs - start_offs; // "wide" and "narrow"    
+}
+
+
+static PyObject * calc_width(PyObject *self, PyObject *args)
+{
+    PyObject *text;
+    int start_offs, end_offs;
+    int ret;
+
+    if (!PyArg_ParseTuple(args, "Oii", &text, &start_offs, &end_offs))
+        return NULL; 
+
+    ret = Py_CalcWidth(text, start_offs, end_offs);
+    if (ret==-1) //an error occured
+        return NULL;
+            
+    return Py_BuildValue("i", ret);
+}
+
+
+//======================================================================
+char calc_text_pos_doc[] =
+"calc_text_pos(string/unicode text, int start_offs, int end_offs, int pref_col)\n\
+-> (int pos, int actual_col)\n\n\
+Calculate the closest position to the screen column pref_col in text\n\
+where start_offs is the offset into text assumed to be screen column 0\n\
+and end_offs is the end of the range to search.\n\n\
+Returns (position, actual_col).\n\n\
+text -- string or unicode text\n\
+start_offs -- start offset\n\
+end_offs -- end offset\n\
+pref_col -- preferred column";
+
+
+static int Py_CalcTextPos(PyObject *text, Py_ssize_t start_offs,
+                          Py_ssize_t end_offs, int pref_col, Py_ssize_t *ret)
+{
+    unsigned char * str;
+    Py_ssize_t i, dummy[2], str_len;
+    int screencols, width;
+    Py_UNICODE *ustr;
+
+    if (PyUnicode_Check(text))  //text_py is unicode string
+    {
+        ustr = PyUnicode_AS_UNICODE(text);
+        screencols = 0;
+ 
+        for(i=start_offs; i<end_offs; i++)
+        {
+            width = Py_GetWidth(ustr[i]);
+            
+            if (width+screencols > pref_col)
+            {
+                ret[0] = i;
+                ret[1] = screencols;
+                return 0;
+            }
+
+            screencols += width;
+        }
+        
+        ret[0] = i;
+        ret[1] = screencols;
+        return 0;
+    }
+
+    if (!PyBytes_Check(text))
+    {
+        PyErr_SetString(PyExc_TypeError, "Neither unicode nor string.");
+        return -1;
+    }
+
+    str = (unsigned char *)PyBytes_AsString(text);
+    str_len = PyBytes_Size(text);
+    
+    if (byte_encoding == ENC_UTF8)
+    {
+        i = start_offs;
+        screencols = 0;
+
+        while (i<end_offs)
+        {
+            Py_DecodeOne(str, str_len, i, dummy);
+            width = Py_GetWidth(dummy[0]);
+
+            if (width+screencols > pref_col)
+            {
+                ret[0] = i;
+                ret[1] = screencols;
+                return 0;
+            }
+
+            i = dummy[1];
+            screencols += width;
+        }
+        
+        ret[0] = i;
+        ret[1] = screencols;
+        return 0;
+    }
+
+    // "wide" and "narrow"
+    i = start_offs + pref_col;
+
+    if (i>= end_offs)
+    {
+        ret[0] = end_offs;
+        ret[1] = end_offs - start_offs;
+        return 0;
+    }
+
+    if (byte_encoding == ENC_WIDE)
+        if (Py_WithinDoubleByte(str, start_offs, i)==2)
+            i -= 1;
+
+    ret[0] = i;
+    ret[1] = i - start_offs;
+    return 0;
+}
+
+
+static PyObject * calc_text_pos(PyObject *self, PyObject *args)
+{
+    PyObject *text;
+    Py_ssize_t start_offs, end_offs, ret[2];
+    int pref_col, err;
+
+    if (!PyArg_ParseTuple(args, "Onni", &text, &start_offs, &end_offs,
+                          &pref_col))
+        return NULL; 
+
+    err = Py_CalcTextPos(text, start_offs, end_offs, pref_col, ret);
+    if (err==-1) //an error occured
+        return NULL;
+                      
+    return Py_BuildValue("(nn)", ret[0], ret[1]);
+}
+
+
+//======================================================================
+
+static PyMethodDef Str_UtilMethods[] = {
+    {"get_byte_encoding", get_byte_encoding, METH_VARARGS,
+     get_byte_encoding_doc},
+    {"set_byte_encoding", set_byte_encoding, METH_VARARGS,
+     set_byte_encoding_doc},
+    {"get_width", get_width, METH_VARARGS, get_width_doc},
+    {"decode_one", decode_one, METH_VARARGS, decode_one_doc},
+    {"decode_one_right", decode_one_right, METH_VARARGS, decode_one_right_doc},
+    {"within_double_byte", within_double_byte, METH_VARARGS,
+     within_double_byte_doc},
+    {"is_wide_char", is_wide_char, METH_VARARGS, is_wide_char_doc},
+    {"move_prev_char", move_prev_char, METH_VARARGS, move_prev_char_doc},
+    {"move_next_char", move_next_char, METH_VARARGS, move_next_char_doc},
+    {"calc_width", calc_width, METH_VARARGS, calc_width_doc},
+    {"calc_text_pos", calc_text_pos, METH_VARARGS, calc_text_pos_doc},
+    {NULL, NULL, 0, NULL}        // Sentinel 
+};
+
+static struct PyModuleDef Str_UtilModule = {
+    PyModuleDef_HEAD_INIT,
+    "str_util",
+    NULL,
+    -1,
+    Str_UtilMethods
+};
+
+PyMODINIT_FUNC PyInit_str_util(void)
+{
+    return PyModule_Create(&Str_UtilModule);
+}
+
+
+/*
+int main(int argc, char *argv[])
+{
+    //Pass argv[0] to the Python interpreter:
+    Py_SetProgramName(argv[0]);
+
+    //Initialize the Python interpreter. 
+    Py_Initialize();
+
+    //Add a static module:
+    initstr_util();
+
+    return 0;
+}
+*/
author	Ian Ward <ian@excess.org>	2011-01-11 13:43:14 -0500
committer	Ian Ward <ian@excess.org>	2011-01-11 13:43:14 -0500
commit	c0fed218232315d4dc8a3500ccaf2a2cbc7a99eb (patch)
tree	392992a2a91c4d738803f0071c54912c3d4d55f6 /source
parent	16ad617f3aeb9fc98719f40a2e3a9dd43f4e3c4a (diff)
download	urwid-c0fed218232315d4dc8a3500ccaf2a2cbc7a99eb.tar.gz