diff options
| author | Ian Ward <ian@excess.org> | 2011-01-11 13:43:14 -0500 |
|---|---|---|
| committer | Ian Ward <ian@excess.org> | 2011-01-11 13:43:14 -0500 |
| commit | c0fed218232315d4dc8a3500ccaf2a2cbc7a99eb (patch) | |
| tree | 392992a2a91c4d738803f0071c54912c3d4d55f6 /source | |
| parent | 16ad617f3aeb9fc98719f40a2e3a9dd43f4e3c4a (diff) | |
| download | urwid-c0fed218232315d4dc8a3500ccaf2a2cbc7a99eb.tar.gz | |
maintain separate versions of C extension for Python2/3
--HG--
branch : python3
rename : source/str_util.c => source/str_util_python3.c
Diffstat (limited to 'source')
| -rw-r--r-- | source/str_util.c | 133 | ||||
| -rw-r--r-- | source/str_util_python3.c | 833 |
2 files changed, 894 insertions, 72 deletions
diff --git a/source/str_util.c b/source/str_util.c index 76dc1ec..90b665f 100644 --- a/source/str_util.c +++ b/source/str_util.c @@ -176,8 +176,8 @@ text -- string text\n\ pos -- position in text"; -static void Py_DecodeOne(const unsigned char *text, Py_ssize_t text_len, - Py_ssize_t pos, Py_ssize_t *ret) +static void Py_DecodeOne(const unsigned char *text, int text_len, int pos, + int *ret) { int dummy; @@ -309,17 +309,17 @@ static PyObject * decode_one(PyObject *self, PyObject *args) { PyObject *py_text; - Py_ssize_t pos, text_len; + int pos, text_len; char *text; - Py_ssize_t ret[2]; + int ret[2]; - if (!PyArg_ParseTuple(args, "On", &py_text, &pos)) + if (!PyArg_ParseTuple(args, "Oi", &py_text, &pos)) return NULL; - PyBytes_AsStringAndSize(py_text, &text, &text_len); + PyString_AsStringAndSize(py_text, &text, &text_len); Py_DecodeOne((unsigned char *)text, text_len, pos, ret); - return Py_BuildValue("(n, n)", ret[0], ret[1]); + return Py_BuildValue("(i, i)", ret[0], ret[1]); } @@ -333,10 +333,10 @@ text -- text string \n\ pos -- position in text"; -static void Py_DecodeOneRight(const unsigned char *text, Py_ssize_t text_len, - Py_ssize_t pos, Py_ssize_t *ret) +static void Py_DecodeOneRight(const unsigned char *text, int text_len, int pos, + int *ret) { - Py_ssize_t subret[2]; + int subret[2]; while (pos >= 0) { @@ -364,14 +364,14 @@ static PyObject * decode_one_right(PyObject *self, PyObject *args) PyObject *py_text; - Py_ssize_t pos, text_len; + int pos, text_len; char *text; - Py_ssize_t ret[2] = {'?',0}; + int ret[2] = {'?',0}; - if (!PyArg_ParseTuple(args, "On", &py_text, &pos)) + if (!PyArg_ParseTuple(args, "Oi", &py_text, &pos)) return NULL; - PyBytes_AsStringAndSize(py_text, &text, &text_len); + PyString_AsStringAndSize(py_text, &text, &text_len); Py_DecodeOneRight((const unsigned char *)text, text_len, pos, ret); return Py_BuildValue("(i, i)", ret[0], ret[1]); @@ -391,10 +391,10 @@ Return values:\n\ 2 -- pos is on the 2nd half of a dbe char"; -static int Py_WithinDoubleByte(const unsigned char *str, Py_ssize_t line_start, - Py_ssize_t pos) +static int Py_WithinDoubleByte(const unsigned char *str, int line_start, + int pos) { - Py_ssize_t i; + int i; if ((str[pos] >= 0x40) && (str[pos] < 0x7f)) { @@ -421,14 +421,14 @@ static int Py_WithinDoubleByte(const unsigned char *str, Py_ssize_t line_start, static PyObject * within_double_byte(PyObject *self, PyObject *args) { const unsigned char *str; - Py_ssize_t line_start, pos; - Py_ssize_t ret; + int line_start, pos; + int ret; - if (!PyArg_ParseTuple(args, "snn", &str, &line_start, &pos)) + if (!PyArg_ParseTuple(args, "sii", &str, &line_start, &pos)) return NULL; ret = Py_WithinDoubleByte(str, line_start, pos); - return Py_BuildValue("n", ret); + return Py_BuildValue("i", ret); } @@ -439,11 +439,11 @@ Test if the character at offs within text is wide.\n\n\ text -- string or unicode text\n\ offs -- offset"; -static int Py_IsWideChar(PyObject *text, Py_ssize_t offs) +static int Py_IsWideChar(PyObject *text, int offs) { const unsigned char *str; Py_UNICODE *ustr; - Py_ssize_t ret[2], str_len; + int ret[2], str_len; if (PyUnicode_Check(text)) //text_py is unicode string { @@ -458,8 +458,8 @@ static int Py_IsWideChar(PyObject *text, Py_ssize_t offs) return -1; } - str = (const unsigned char *)PyBytes_AsString(text); - str_len = (int) PyBytes_Size(text); + str = (const unsigned char *)PyString_AsString(text); + str_len = (int) PyString_Size(text); if (byte_encoding == ENC_UTF8) { @@ -477,10 +477,10 @@ static int Py_IsWideChar(PyObject *text, Py_ssize_t offs) static PyObject * is_wide_char(PyObject *self, PyObject *args) { PyObject *text; - Py_ssize_t offs; + int offs; int ret; - if (!PyArg_ParseTuple(args, "On", &text, &offs)) + if (!PyArg_ParseTuple(args, "Oi", &text, &offs)) return NULL; ret = Py_IsWideChar(text, offs); @@ -501,16 +501,16 @@ start_offs -- start offset\n\ end_offs -- end offset"; -static Py_ssize_t Py_MovePrevChar(PyObject *text, Py_ssize_t start_offs, - Py_ssize_t end_offs) +static int Py_MovePrevChar(PyObject *text, int start_offs, + int end_offs) { - Py_ssize_t position; + int position; unsigned char *str; if (PyUnicode_Check(text)) //text_py is unicode string return end_offs-1; else - str = (unsigned char *)PyBytes_AsString(text); + str = (unsigned char *)PyString_AsString(text); if (byte_encoding == ENC_UTF8) //encoding is utf8 { @@ -530,14 +530,14 @@ static Py_ssize_t Py_MovePrevChar(PyObject *text, Py_ssize_t start_offs, static PyObject * move_prev_char(PyObject *self, PyObject *args) { PyObject *text; - Py_ssize_t start_offs, end_offs; - Py_ssize_t ret; + int start_offs, end_offs; + int ret; - if (!PyArg_ParseTuple(args, "Onn", &text, &start_offs, &end_offs)) + if (!PyArg_ParseTuple(args, "Oii", &text, &start_offs, &end_offs)) return NULL; ret = Py_MovePrevChar(text, start_offs, end_offs); - return Py_BuildValue("n", ret); + return Py_BuildValue("i", ret); } @@ -550,16 +550,16 @@ start_offs -- start offset\n\ end_offs -- end offset"; -static Py_ssize_t Py_MoveNextChar(PyObject *text, Py_ssize_t start_offs, - Py_ssize_t end_offs) +static int Py_MoveNextChar(PyObject *text, int start_offs, + int end_offs) { - Py_ssize_t position; + int position; unsigned char * str; if (PyUnicode_Check(text)) //text_py is unicode string return start_offs+1; else - str = (unsigned char *)PyBytes_AsString(text); + str = (unsigned char *)PyString_AsString(text); if (byte_encoding == ENC_UTF8) //encoding is utf8 { @@ -580,14 +580,14 @@ static Py_ssize_t Py_MoveNextChar(PyObject *text, Py_ssize_t start_offs, static PyObject * move_next_char(PyObject *self, PyObject *args) { PyObject *text; - Py_ssize_t start_offs, end_offs; - Py_ssize_t ret; + int start_offs, end_offs; + int ret; - if (!PyArg_ParseTuple(args, "Onn", &text, &start_offs, &end_offs)) + if (!PyArg_ParseTuple(args, "Oii", &text, &start_offs, &end_offs)) return NULL; ret = Py_MoveNextChar(text, start_offs, end_offs); - return Py_BuildValue("n", ret); + return Py_BuildValue("i", ret); } @@ -600,12 +600,10 @@ start_offs -- start offset\n\ end_offs -- end offset"; -static Py_ssize_t Py_CalcWidth(PyObject *text, Py_ssize_t start_offs, - Py_ssize_t end_offs) +static int Py_CalcWidth(PyObject *text, int start_offs, int end_offs) { unsigned char * str; - Py_ssize_t i, ret[2], str_len; - int screencols; + int i, screencols, ret[2], str_len; Py_UNICODE *ustr; if (PyUnicode_Check(text)) //text_py is unicode string @@ -619,14 +617,14 @@ static Py_ssize_t Py_CalcWidth(PyObject *text, Py_ssize_t start_offs, return screencols; } - if (!PyBytes_Check(text)) + if (!PyString_Check(text)) { PyErr_SetString(PyExc_TypeError, "Neither unicode nor string."); return -1; } - str = (unsigned char *)PyBytes_AsString(text); - str_len = PyBytes_Size(text); + str = (unsigned char *)PyString_AsString(text); + str_len = (int) PyString_Size(text); if (byte_encoding == ENC_UTF8) { @@ -677,12 +675,11 @@ end_offs -- end offset\n\ pref_col -- preferred column"; -static int Py_CalcTextPos(PyObject *text, Py_ssize_t start_offs, - Py_ssize_t end_offs, int pref_col, Py_ssize_t *ret) +static int Py_CalcTextPos(PyObject *text, int start_offs, int end_offs, + int pref_col, int *ret) { unsigned char * str; - Py_ssize_t i, dummy[2], str_len; - int screencols, width; + int i, screencols, dummy[2], str_len, width; Py_UNICODE *ustr; if (PyUnicode_Check(text)) //text_py is unicode string @@ -709,14 +706,14 @@ static int Py_CalcTextPos(PyObject *text, Py_ssize_t start_offs, return 0; } - if (!PyBytes_Check(text)) + if (!PyString_Check(text)) { PyErr_SetString(PyExc_TypeError, "Neither unicode nor string."); return -1; } - str = (unsigned char *)PyBytes_AsString(text); - str_len = PyBytes_Size(text); + str = (unsigned char *)PyString_AsString(text); + str_len = (int) PyString_Size(text); if (byte_encoding == ENC_UTF8) { @@ -767,10 +764,10 @@ static int Py_CalcTextPos(PyObject *text, Py_ssize_t start_offs, static PyObject * calc_text_pos(PyObject *self, PyObject *args) { PyObject *text; - Py_ssize_t start_offs, end_offs, ret[2]; - int pref_col, err; + int start_offs, end_offs, pref_col; + int ret[2], err; - if (!PyArg_ParseTuple(args, "Onni", &text, &start_offs, &end_offs, + if (!PyArg_ParseTuple(args, "Oiii", &text, &start_offs, &end_offs, &pref_col)) return NULL; @@ -778,7 +775,7 @@ static PyObject * calc_text_pos(PyObject *self, PyObject *args) if (err==-1) //an error occured return NULL; - return Py_BuildValue("(nn)", ret[0], ret[1]); + return Py_BuildValue("(ii)", ret[0], ret[1]); } @@ -802,21 +799,14 @@ static PyMethodDef Str_UtilMethods[] = { {NULL, NULL, 0, NULL} // Sentinel }; -static struct PyModuleDef Str_UtilModule = { - PyModuleDef_HEAD_INIT, - "str_util", - NULL, - -1, - Str_UtilMethods -}; -PyMODINIT_FUNC PyInit_str_util(void) +PyMODINIT_FUNC initstr_util(void) { - return PyModule_Create(&Str_UtilModule); + Py_InitModule("str_util", Str_UtilMethods); } -/* + int main(int argc, char *argv[]) { //Pass argv[0] to the Python interpreter: @@ -830,4 +820,3 @@ int main(int argc, char *argv[]) return 0; } -*/ diff --git a/source/str_util_python3.c b/source/str_util_python3.c new file mode 100644 index 0000000..76dc1ec --- /dev/null +++ b/source/str_util_python3.c @@ -0,0 +1,833 @@ +/* Urwid unicode character processing tables + + Copyright (C) 2006 Rebecca Breu. + This file contains rewritten code of utable.py by Ian Ward. + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + Urwid web site: http://excess.org/urwid/ +*/ + + +#include <Python.h> + +#define ENC_UTF8 1 +#define ENC_WIDE 2 +#define ENC_NARROW 3 + + +static int widths_len = 2*38; +static const long int widths[] = { + 126, 1, + 159, 0, + 687, 1, + 710, 0, + 711, 1, + 727, 0, + 733, 1, + 879, 0, + 1154, 1, + 1161, 0, + 4347, 1, + 4447, 2, + 7467, 1, + 7521, 0, + 8369, 1, + 8426, 0, + 9000, 1, + 9002, 2, + 11021, 1, + 12350, 2, + 12351, 1, + 12438, 2, + 12442, 0, + 19893, 2, + 19967, 1, + 55203, 2, + 63743, 1, + 64106, 2, + 65039, 1, + 65059, 0, + 65131, 2, + 65279, 1, + 65376, 2, + 65500, 1, + 65510, 2, + 120831, 1, + 262141, 2, + 1114109, 1 +}; + + +static short byte_encoding = ENC_UTF8; + + +static PyObject * to_bool(int val) +{ + if (val) Py_RETURN_TRUE; + else Py_RETURN_FALSE; +} + + +//====================================================================== +static char get_byte_encoding_doc[] = +"get_byte_encoding() -> string encoding\n\n\ +Get byte encoding ('utf8', 'wide', or 'narrow')."; + +static PyObject * get_byte_encoding(PyObject *self, PyObject *args) +{ + if (!PyArg_ParseTuple(args, "")) + return NULL; + + if (byte_encoding == ENC_UTF8) + return Py_BuildValue("s", "utf8"); + if (byte_encoding == ENC_WIDE) + return Py_BuildValue("s", "wide"); + if (byte_encoding == ENC_NARROW) + return Py_BuildValue("s", "narrow"); + return Py_None; // should never happen +} + + +//====================================================================== +static char set_byte_encoding_doc[] = +"set_byte_encoding(string encoding) -> None\n\n\ +Set byte encoding. \n\n\ +encoding -- one of 'utf8', 'wide', 'narrow'"; + +static PyObject * set_byte_encoding(PyObject *self, PyObject *args) +{ + char * enc; + + if (!PyArg_ParseTuple(args, "s", &enc)) + return NULL; + + if (strcmp(enc, "utf8") == 0) + byte_encoding = ENC_UTF8; + else if (strcmp(enc, "wide") == 0) + byte_encoding = ENC_WIDE; + else if (strcmp(enc, "narrow") == 0) + byte_encoding = ENC_NARROW; + else + { + // got wrong encoding + PyErr_SetString(PyExc_ValueError, "Unknown encoding."); + return NULL; + } + + return Py_None; +} + + +//====================================================================== +static char get_width_doc[] = +"get_width(int ord) -> int width\n\n\ +Return the screen column width for unicode ordinal ord.\n\n\ +ord -- ordinal"; + + +static int Py_GetWidth(long int ord) +{ + int i; + + if ((ord == 0xe) || (ord == 0xf)) + return 0; + + for (i=0; i<widths_len; i+=2) + { + if (ord <= widths[i]) + return widths[i+1]; + } + + return 1; +} + + +static PyObject * get_width(PyObject *self, PyObject *args) +{ + long int ord; + int ret; + + if (!PyArg_ParseTuple(args, "l", &ord)) + return NULL; + + ret = Py_GetWidth(ord); + return Py_BuildValue("i", ret); +} + + +//====================================================================== +static char decode_one_doc[] = +"decode_one(string text, int pos) -> (int ord, int nextpos)\n\n\ +Return (ordinal at pos, next position) for UTF-8 encoded text.\n\n\ +text -- string text\n\ +pos -- position in text"; + + +static void Py_DecodeOne(const unsigned char *text, Py_ssize_t text_len, + Py_ssize_t pos, Py_ssize_t *ret) +{ + int dummy; + + if (!(text[pos]&0x80)) + { + ret[0] = text[pos]; + ret[1] = pos+1; + return; + } + + if (text_len - pos < 2) //error + { + ret[0] = '?'; + ret[1] = pos+1; + return; + } + + if ((text[pos]&0xe0) == 0xc0) + { + if ((text[pos+1]&0xc0) != 0x80) //error + { + ret[0] = '?'; + ret[1] = pos+1; + return; + } + + dummy = ((text[pos]&0x1f)<<6) | (text[pos+1]&0x3f); + if (dummy < 0x80) //error + { + ret[0] = '?'; + ret[1] = pos+1; + return; + } + + ret[0] = dummy; + ret[1] = pos+2; + return; + } + + if (text_len - pos < 3) //error + { + ret[0] = '?'; + ret[1] = pos + 1; + return; + } + + if ((text[pos]&0xf0) == 0xe0) + { + if ((text[pos+1]&0xc0) != 0x80) //error + { + ret[0] = '?'; + ret[1] = pos + 1; + return; + } + + if ((text[pos+2]&0xc0) != 0x80) //error + { + ret[0] = '?'; + ret[1] = pos + 1; + return; + } + + dummy = ((text[pos]&0x0f) << 12) | ((text[pos+1]&0x3f) << 6) | + (text[pos+2]&0x3f); + if (dummy < 0x800) //error + { + ret[0] = '?'; + ret[1] = pos + 1; + return; + } + + ret[0] = dummy; + ret[1] = pos + 3; + return; + } + + if (text_len - pos < 4) + { + ret[0] = '?'; + ret[1] = pos + 1; + return; + } + + if ((text[pos]&0xf8) == 0xf0) + { + if ((text[pos+1]&0xc0) != 0x80) //error + { + ret[0] = '?'; + ret[1] = pos + 1; + return; + } + + if ((text[pos+2]&0xc0) != 0x80) //error + { + ret[0] = '?'; + ret[1] = pos + 1; + return; + } + + if ((text[pos+3]&0xc0) != 0x80) //error + { + ret[0] = '?'; + ret[1] = pos + 1; + return; + } + + dummy = ((text[pos]&0x07) << 18) | ((text[pos+1]&0x3f) << 12) | + ((text[pos+2]&0x3f) << 6) | (text[pos+3]&0x3f); + if (dummy < 0x10000) //error + { + ret[0] = '?'; + ret[1] = pos + 1; + return; + } + + ret[0] = dummy; + ret[1] = pos + 4; + return; + } + + ret[0] = '?'; + ret[1] = pos + 1; + return; + +} + + +static PyObject * decode_one(PyObject *self, PyObject *args) +{ + PyObject *py_text; + + Py_ssize_t pos, text_len; + char *text; + Py_ssize_t ret[2]; + + if (!PyArg_ParseTuple(args, "On", &py_text, &pos)) + return NULL; + + PyBytes_AsStringAndSize(py_text, &text, &text_len); + + Py_DecodeOne((unsigned char *)text, text_len, pos, ret); + return Py_BuildValue("(n, n)", ret[0], ret[1]); +} + + + +//====================================================================== +static char decode_one_right_doc[] = +"decode_one_right(string text, int pos) -> (int ord, int nextpos)\n\n\ +Return (ordinal at pos, next position) for UTF-8 encoded text.\n\ +pos is assumed to be on the trailing byte of a utf-8 sequence.\n\ +text -- text string \n\ +pos -- position in text"; + + +static void Py_DecodeOneRight(const unsigned char *text, Py_ssize_t text_len, + Py_ssize_t pos, Py_ssize_t *ret) +{ + Py_ssize_t subret[2]; + + while (pos >= 0) + { + if ((text[pos]&0xc0) != 0x80) + { + Py_DecodeOne(text, text_len, pos, subret); + ret[0] = subret[0]; + ret[1] = pos-1; + return; + } + pos-=1; + + if (pos == pos-4) //error + { + ret[0] = '?'; + ret[1] = pos - 1; + return; + } + } +} + + +static PyObject * decode_one_right(PyObject *self, PyObject *args) +{ + + PyObject *py_text; + + Py_ssize_t pos, text_len; + char *text; + Py_ssize_t ret[2] = {'?',0}; + + if (!PyArg_ParseTuple(args, "On", &py_text, &pos)) + return NULL; + + PyBytes_AsStringAndSize(py_text, &text, &text_len); + + Py_DecodeOneRight((const unsigned char *)text, text_len, pos, ret); + return Py_BuildValue("(i, i)", ret[0], ret[1]); +} + + +//====================================================================== +static char within_double_byte_doc[] = +"within_double_byte(strint text, int line_start, int pos) -> int withindb\n\n\ +Return whether pos is within a double-byte encoded character.\n\n\ +str -- string in question\n\ +line_start -- offset of beginning of line (< pos)\n\ +pos -- offset in question\n\n\ +Return values:\n\ +0 -- not within dbe char, or double_byte_encoding == False\n\ +1 -- pos is on the 1st half of a dbe char\n\ +2 -- pos is on the 2nd half of a dbe char"; + + +static int Py_WithinDoubleByte(const unsigned char *str, Py_ssize_t line_start, + Py_ssize_t pos) +{ + Py_ssize_t i; + + if ((str[pos] >= 0x40) && (str[pos] < 0x7f)) + { + //might be second half of big5, uhc or gbk encoding + if (pos == line_start) return 0; + + if (str[pos-1] >= 0x81) + { + if ((Py_WithinDoubleByte(str, line_start, pos-1)) == 1) return 2; + else return 0; + } + } + + if (str[pos] < 0x80) return 0; + + for (i=pos-1; i>=line_start; i--) + if (str[i] < 0x80) break; + + if ((pos-i) & 1) return 1; + else return 2; +} + + +static PyObject * within_double_byte(PyObject *self, PyObject *args) +{ + const unsigned char *str; + Py_ssize_t line_start, pos; + Py_ssize_t ret; + + if (!PyArg_ParseTuple(args, "snn", &str, &line_start, &pos)) + return NULL; + + ret = Py_WithinDoubleByte(str, line_start, pos); + return Py_BuildValue("n", ret); +} + + +//====================================================================== +char is_wide_char_doc[] = +"is_wide_char(string/unicode text, int offs) -> bool iswide\n\n\ +Test if the character at offs within text is wide.\n\n\ +text -- string or unicode text\n\ +offs -- offset"; + +static int Py_IsWideChar(PyObject *text, Py_ssize_t offs) +{ + const unsigned char *str; + Py_UNICODE *ustr; + Py_ssize_t ret[2], str_len; + + if (PyUnicode_Check(text)) //text_py is unicode string + { + ustr = PyUnicode_AS_UNICODE(text); + return (Py_GetWidth((long int)ustr[offs]) == 2); + } + + if ( text->ob_type != Py_BuildValue("s","")->ob_type ) { + + PyErr_SetString(PyExc_TypeError, + "is_wide_char: Argument \"text\" is not a string."); + return -1; + } + + str = (const unsigned char *)PyBytes_AsString(text); + str_len = (int) PyBytes_Size(text); + + if (byte_encoding == ENC_UTF8) + { + Py_DecodeOne(str, str_len, offs, ret); + return (Py_GetWidth(ret[0]) == 2); + } + + if (byte_encoding == ENC_WIDE) + return (Py_WithinDoubleByte(str, offs, offs) == 1); + + return 0; +} + + +static PyObject * is_wide_char(PyObject *self, PyObject *args) +{ + PyObject *text; + Py_ssize_t offs; + int ret; + + if (!PyArg_ParseTuple(args, "On", &text, &offs)) + return NULL; + + ret = Py_IsWideChar(text, offs); + + if ( ret == -1) // error + return NULL; + + return Py_BuildValue("O", to_bool(ret)); +} + + +//====================================================================== +char move_prev_char_doc[] = +"move_prev_char(string/unicode text, int start_offs, int end_offs) -> int pos\n\n\ +Return the position of the character before end_offs.\n\n\ +text -- string or unicode text\n\ +start_offs -- start offset\n\ +end_offs -- end offset"; + + +static Py_ssize_t Py_MovePrevChar(PyObject *text, Py_ssize_t start_offs, + Py_ssize_t end_offs) +{ + Py_ssize_t position; + unsigned char *str; + + if (PyUnicode_Check(text)) //text_py is unicode string + return end_offs-1; + else + str = (unsigned char *)PyBytes_AsString(text); + + if (byte_encoding == ENC_UTF8) //encoding is utf8 + { + position = end_offs - 1; + while ((str[position]&0xc0) == 0x80) + position -=1; + return position; + } + else if ((byte_encoding == ENC_WIDE) && + (Py_WithinDoubleByte(str, start_offs, end_offs-1) == 2)) + return end_offs-2; + else + return end_offs-1; +} + + +static PyObject * move_prev_char(PyObject *self, PyObject *args) +{ + PyObject *text; + Py_ssize_t start_offs, end_offs; + Py_ssize_t ret; + + if (!PyArg_ParseTuple(args, "Onn", &text, &start_offs, &end_offs)) + return NULL; + + ret = Py_MovePrevChar(text, start_offs, end_offs); + return Py_BuildValue("n", ret); +} + + +//====================================================================== +char move_next_char_doc[] = +"move_next_char(string/unicode text, int start_offs, int end_offs) -> int pos\n\n\ +Return the position of the character after start_offs.\n\n\ +text -- string or unicode text\n\ +start_offs -- start offset\n\ +end_offs -- end offset"; + + +static Py_ssize_t Py_MoveNextChar(PyObject *text, Py_ssize_t start_offs, + Py_ssize_t end_offs) +{ + Py_ssize_t position; + unsigned char * str; + + if (PyUnicode_Check(text)) //text_py is unicode string + return start_offs+1; + else + str = (unsigned char *)PyBytes_AsString(text); + + if (byte_encoding == ENC_UTF8) //encoding is utf8 + { + position = start_offs + 1; + while ((position < end_offs) && ((str[position]&0xc0) == 0x80)) + position +=1; + + return position; + } + else if ((byte_encoding == ENC_WIDE) && + (Py_WithinDoubleByte(str, start_offs, start_offs) == 1)) + return start_offs+2; + else + return start_offs+1; +} + + +static PyObject * move_next_char(PyObject *self, PyObject *args) +{ + PyObject *text; + Py_ssize_t start_offs, end_offs; + Py_ssize_t ret; + + if (!PyArg_ParseTuple(args, "Onn", &text, &start_offs, &end_offs)) + return NULL; + + ret = Py_MoveNextChar(text, start_offs, end_offs); + return Py_BuildValue("n", ret); +} + + +//====================================================================== +char calc_width_doc[] = +"calc_width(string/unicode text, int start_off, int end_offs) -> int width\n\n\ +Return the screen column width of text between start_offs and end_offs.\n\n\ +text -- string or unicode text\n\ +start_offs -- start offset\n\ +end_offs -- end offset"; + + +static Py_ssize_t Py_CalcWidth(PyObject *text, Py_ssize_t start_offs, + Py_ssize_t end_offs) +{ + unsigned char * str; + Py_ssize_t i, ret[2], str_len; + int screencols; + Py_UNICODE *ustr; + + if (PyUnicode_Check(text)) //text_py is unicode string + { + ustr = PyUnicode_AS_UNICODE(text); + screencols = 0; + + for(i=start_offs; i<end_offs; i++) + screencols += Py_GetWidth(ustr[i]); + + return screencols; + } + + if (!PyBytes_Check(text)) + { + PyErr_SetString(PyExc_TypeError, "Neither unicode nor string."); + return -1; + } + + str = (unsigned char *)PyBytes_AsString(text); + str_len = PyBytes_Size(text); + + if (byte_encoding == ENC_UTF8) + { + i = start_offs; + screencols = 0; + + while (i<end_offs) { + Py_DecodeOne(str, str_len, i, ret); + screencols += Py_GetWidth(ret[0]); + i = ret[1]; + } + + return screencols; + } + + return end_offs - start_offs; // "wide" and "narrow" +} + + +static PyObject * calc_width(PyObject *self, PyObject *args) +{ + PyObject *text; + int start_offs, end_offs; + int ret; + + if (!PyArg_ParseTuple(args, "Oii", &text, &start_offs, &end_offs)) + return NULL; + + ret = Py_CalcWidth(text, start_offs, end_offs); + if (ret==-1) //an error occured + return NULL; + + return Py_BuildValue("i", ret); +} + + +//====================================================================== +char calc_text_pos_doc[] = +"calc_text_pos(string/unicode text, int start_offs, int end_offs, int pref_col)\n\ +-> (int pos, int actual_col)\n\n\ +Calculate the closest position to the screen column pref_col in text\n\ +where start_offs is the offset into text assumed to be screen column 0\n\ +and end_offs is the end of the range to search.\n\n\ +Returns (position, actual_col).\n\n\ +text -- string or unicode text\n\ +start_offs -- start offset\n\ +end_offs -- end offset\n\ +pref_col -- preferred column"; + + +static int Py_CalcTextPos(PyObject *text, Py_ssize_t start_offs, + Py_ssize_t end_offs, int pref_col, Py_ssize_t *ret) +{ + unsigned char * str; + Py_ssize_t i, dummy[2], str_len; + int screencols, width; + Py_UNICODE *ustr; + + if (PyUnicode_Check(text)) //text_py is unicode string + { + ustr = PyUnicode_AS_UNICODE(text); + screencols = 0; + + for(i=start_offs; i<end_offs; i++) + { + width = Py_GetWidth(ustr[i]); + + if (width+screencols > pref_col) + { + ret[0] = i; + ret[1] = screencols; + return 0; + } + + screencols += width; + } + + ret[0] = i; + ret[1] = screencols; + return 0; + } + + if (!PyBytes_Check(text)) + { + PyErr_SetString(PyExc_TypeError, "Neither unicode nor string."); + return -1; + } + + str = (unsigned char *)PyBytes_AsString(text); + str_len = PyBytes_Size(text); + + if (byte_encoding == ENC_UTF8) + { + i = start_offs; + screencols = 0; + + while (i<end_offs) + { + Py_DecodeOne(str, str_len, i, dummy); + width = Py_GetWidth(dummy[0]); + + if (width+screencols > pref_col) + { + ret[0] = i; + ret[1] = screencols; + return 0; + } + + i = dummy[1]; + screencols += width; + } + + ret[0] = i; + ret[1] = screencols; + return 0; + } + + // "wide" and "narrow" + i = start_offs + pref_col; + + if (i>= end_offs) + { + ret[0] = end_offs; + ret[1] = end_offs - start_offs; + return 0; + } + + if (byte_encoding == ENC_WIDE) + if (Py_WithinDoubleByte(str, start_offs, i)==2) + i -= 1; + + ret[0] = i; + ret[1] = i - start_offs; + return 0; +} + + +static PyObject * calc_text_pos(PyObject *self, PyObject *args) +{ + PyObject *text; + Py_ssize_t start_offs, end_offs, ret[2]; + int pref_col, err; + + if (!PyArg_ParseTuple(args, "Onni", &text, &start_offs, &end_offs, + &pref_col)) + return NULL; + + err = Py_CalcTextPos(text, start_offs, end_offs, pref_col, ret); + if (err==-1) //an error occured + return NULL; + + return Py_BuildValue("(nn)", ret[0], ret[1]); +} + + +//====================================================================== + +static PyMethodDef Str_UtilMethods[] = { + {"get_byte_encoding", get_byte_encoding, METH_VARARGS, + get_byte_encoding_doc}, + {"set_byte_encoding", set_byte_encoding, METH_VARARGS, + set_byte_encoding_doc}, + {"get_width", get_width, METH_VARARGS, get_width_doc}, + {"decode_one", decode_one, METH_VARARGS, decode_one_doc}, + {"decode_one_right", decode_one_right, METH_VARARGS, decode_one_right_doc}, + {"within_double_byte", within_double_byte, METH_VARARGS, + within_double_byte_doc}, + {"is_wide_char", is_wide_char, METH_VARARGS, is_wide_char_doc}, + {"move_prev_char", move_prev_char, METH_VARARGS, move_prev_char_doc}, + {"move_next_char", move_next_char, METH_VARARGS, move_next_char_doc}, + {"calc_width", calc_width, METH_VARARGS, calc_width_doc}, + {"calc_text_pos", calc_text_pos, METH_VARARGS, calc_text_pos_doc}, + {NULL, NULL, 0, NULL} // Sentinel +}; + +static struct PyModuleDef Str_UtilModule = { + PyModuleDef_HEAD_INIT, + "str_util", + NULL, + -1, + Str_UtilMethods +}; + +PyMODINIT_FUNC PyInit_str_util(void) +{ + return PyModule_Create(&Str_UtilModule); +} + + +/* +int main(int argc, char *argv[]) +{ + //Pass argv[0] to the Python interpreter: + Py_SetProgramName(argv[0]); + + //Initialize the Python interpreter. + Py_Initialize(); + + //Add a static module: + initstr_util(); + + return 0; +} +*/ |
