/* Urwid unicode character processing tables Copyright (C) 2006 Rebecca Breu. This file contains rewritten code of utable.py by Ian Ward. This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Urwid web site: http://excess.org/urwid/ */ #define PY_SSIZE_T_CLEAN #include #define ENC_UTF8 1 #define ENC_WIDE 2 #define ENC_NARROW 3 #if PY_MAJOR_VERSION >= 3 #define PYTHON3 #endif #if PY_MAJOR_VERSION == 2 && PY_MINOR_VERSION < 5 #define Py_ssize_t int #define FMT_N "i" #else #define FMT_N "n" #endif static int widths_len = 2*38; static const long int widths[] = { 126, 1, 159, 0, 687, 1, 710, 0, 711, 1, 727, 0, 733, 1, 879, 0, 1154, 1, 1161, 0, 4347, 1, 4447, 2, 7467, 1, 7521, 0, 8369, 1, 8426, 0, 9000, 1, 9002, 2, 11021, 1, 12350, 2, 12351, 1, 12438, 2, 12442, 0, 19893, 2, 19967, 1, 55203, 2, 63743, 1, 64106, 2, 65039, 1, 65059, 0, 65131, 2, 65279, 1, 65376, 2, 65500, 1, 65510, 2, 120831, 1, 262141, 2, 1114109, 1 }; static short byte_encoding = ENC_UTF8; static PyObject * to_bool(int val) { if (val) Py_RETURN_TRUE; else Py_RETURN_FALSE; } //====================================================================== static char get_byte_encoding_doc[] = "get_byte_encoding() -> string encoding\n\n\ Get byte encoding ('utf8', 'wide', or 'narrow')."; static PyObject * get_byte_encoding(PyObject *self, PyObject *args) { if (!PyArg_ParseTuple(args, "")) return NULL; if (byte_encoding == ENC_UTF8) return Py_BuildValue("s", "utf8"); if (byte_encoding == ENC_WIDE) return Py_BuildValue("s", "wide"); if (byte_encoding == ENC_NARROW) return Py_BuildValue("s", "narrow"); Py_RETURN_NONE; // should never happen } //====================================================================== static char set_byte_encoding_doc[] = "set_byte_encoding(string encoding) -> None\n\n\ Set byte encoding. \n\n\ encoding -- one of 'utf8', 'wide', 'narrow'"; static PyObject * set_byte_encoding(PyObject *self, PyObject *args) { char * enc; if (!PyArg_ParseTuple(args, "s", &enc)) return NULL; if (strcmp(enc, "utf8") == 0) byte_encoding = ENC_UTF8; else if (strcmp(enc, "wide") == 0) byte_encoding = ENC_WIDE; else if (strcmp(enc, "narrow") == 0) byte_encoding = ENC_NARROW; else { // got wrong encoding PyErr_SetString(PyExc_ValueError, "Unknown encoding."); return NULL; } Py_RETURN_NONE; } //====================================================================== static char get_width_doc[] = "get_width(int ord) -> int width\n\n\ Return the screen column width for unicode ordinal ord.\n\n\ ord -- ordinal"; static int Py_GetWidth(long int ord) { int i; if ((ord == 0xe) || (ord == 0xf)) return 0; for (i=0; i= 0) { if ((text[pos]&0xc0) != 0x80) { Py_DecodeOne(text, text_len, pos, subret); ret[0] = subret[0]; ret[1] = pos-1; return; } pos-=1; if (pos == pos-4) //error { ret[0] = '?'; ret[1] = pos - 1; return; } } } static PyObject * decode_one_right(PyObject *self, PyObject *args) { PyObject *py_text; Py_ssize_t pos, text_len; char *text; Py_ssize_t ret[2] = {'?',0}; if (!PyArg_ParseTuple(args, "O" FMT_N, &py_text, &pos)) return NULL; #ifndef PYTHON3 PyString_AsStringAndSize(py_text, &text, &text_len); #else PyBytes_AsStringAndSize(py_text, &text, &text_len); #endif Py_DecodeOneRight((const unsigned char *)text, text_len, pos, ret); return Py_BuildValue("(" FMT_N ", " FMT_N ")", ret[0], ret[1]); } //====================================================================== static char within_double_byte_doc[] = "within_double_byte(strint text, int line_start, int pos) -> int withindb\n\n\ Return whether pos is within a double-byte encoded character.\n\n\ str -- string in question\n\ line_start -- offset of beginning of line (< pos)\n\ pos -- offset in question\n\n\ Return values:\n\ 0 -- not within dbe char, or double_byte_encoding == False\n\ 1 -- pos is on the 1st half of a dbe char\n\ 2 -- pos is on the 2nd half of a dbe char"; static int Py_WithinDoubleByte(const unsigned char *str, Py_ssize_t line_start, Py_ssize_t pos) { Py_ssize_t i; if ((str[pos] >= 0x40) && (str[pos] < 0x7f)) { //might be second half of big5, uhc or gbk encoding if (pos == line_start) return 0; if (str[pos-1] >= 0x81) { if ((Py_WithinDoubleByte(str, line_start, pos-1)) == 1) return 2; else return 0; } } if (str[pos] < 0x80) return 0; for (i=pos-1; i>=line_start; i--) if (str[i] < 0x80) break; if ((pos-i) & 1) return 1; else return 2; } static PyObject * within_double_byte(PyObject *self, PyObject *args) { const unsigned char *str; Py_ssize_t str_len, line_start, pos; Py_ssize_t ret; if (!PyArg_ParseTuple(args, "s#" FMT_N FMT_N, &str, &str_len, &line_start, &pos)) return NULL; if (line_start < 0 || line_start >= str_len) { PyErr_SetString(PyExc_IndexError, "is_wide_char: Argument \"line_start\" is outside of string."); return NULL; } if (pos < 0 || pos >= str_len) { PyErr_SetString(PyExc_IndexError, "is_wide_char: Argument \"pos\" is outside of string."); return NULL; } if (pos < line_start) { PyErr_SetString(PyExc_IndexError, "is_wide_char: Argument \"pos\" is before \"line_start\"."); return NULL; } ret = Py_WithinDoubleByte(str, line_start, pos); return Py_BuildValue(FMT_N, ret); } //====================================================================== char is_wide_char_doc[] = "is_wide_char(string/unicode text, int offs) -> bool iswide\n\n\ Test if the character at offs within text is wide.\n\n\ text -- string or unicode text\n\ offs -- offset"; static int Py_IsWideChar(PyObject *text, Py_ssize_t offs) { const unsigned char *str; Py_UNICODE *ustr; Py_ssize_t ret[2], str_len; if (PyUnicode_Check(text)) //text_py is unicode string { ustr = PyUnicode_AS_UNICODE(text); return (Py_GetWidth((long int)ustr[offs]) == 2); } #ifndef PYTHON3 if (!PyString_Check(text)) { #else if (!PyBytes_Check(text)) { #endif PyErr_SetString(PyExc_TypeError, "is_wide_char: Argument \"text\" is not a string."); return -1; } #ifndef PYTHON3 str = (const unsigned char *)PyString_AsString(text); str_len = (int) PyString_Size(text); #else str = (const unsigned char *)PyBytes_AsString(text); str_len = (int) PyBytes_Size(text); #endif if (byte_encoding == ENC_UTF8) { Py_DecodeOne(str, str_len, offs, ret); return (Py_GetWidth(ret[0]) == 2); } if (byte_encoding == ENC_WIDE) return (Py_WithinDoubleByte(str, offs, offs) == 1); return 0; } static PyObject * is_wide_char(PyObject *self, PyObject *args) { PyObject *text; Py_ssize_t offs; int ret; if (!PyArg_ParseTuple(args, "O" FMT_N, &text, &offs)) return NULL; ret = Py_IsWideChar(text, offs); if ( ret == -1) // error return NULL; return Py_BuildValue("O", to_bool(ret)); } //====================================================================== char move_prev_char_doc[] = "move_prev_char(string/unicode text, int start_offs, int end_offs) -> int pos\n\n\ Return the position of the character before end_offs.\n\n\ text -- string or unicode text\n\ start_offs -- start offset\n\ end_offs -- end offset"; static Py_ssize_t Py_MovePrevChar(PyObject *text, Py_ssize_t start_offs, Py_ssize_t end_offs) { Py_ssize_t position; unsigned char *str; if (PyUnicode_Check(text)) //text_py is unicode string return end_offs-1; else #ifndef PYTHON3 str = (unsigned char *)PyString_AsString(text); #else str = (unsigned char *)PyBytes_AsString(text); #endif if (byte_encoding == ENC_UTF8) //encoding is utf8 { position = end_offs - 1; while ((str[position]&0xc0) == 0x80) position -=1; return position; } else if ((byte_encoding == ENC_WIDE) && (Py_WithinDoubleByte(str, start_offs, end_offs-1) == 2)) return end_offs-2; else return end_offs-1; } static PyObject * move_prev_char(PyObject *self, PyObject *args) { PyObject *text; Py_ssize_t start_offs, end_offs; Py_ssize_t ret; if (!PyArg_ParseTuple(args, "O" FMT_N FMT_N, &text, &start_offs, &end_offs)) return NULL; ret = Py_MovePrevChar(text, start_offs, end_offs); return Py_BuildValue(FMT_N, ret); } //====================================================================== char move_next_char_doc[] = "move_next_char(string/unicode text, int start_offs, int end_offs) -> int pos\n\n\ Return the position of the character after start_offs.\n\n\ text -- string or unicode text\n\ start_offs -- start offset\n\ end_offs -- end offset"; static Py_ssize_t Py_MoveNextChar(PyObject *text, Py_ssize_t start_offs, Py_ssize_t end_offs) { Py_ssize_t position; unsigned char * str; if (PyUnicode_Check(text)) //text_py is unicode string return start_offs+1; else #ifndef PYTHON3 str = (unsigned char *)PyString_AsString(text); #else str = (unsigned char *)PyBytes_AsString(text); #endif if (byte_encoding == ENC_UTF8) //encoding is utf8 { position = start_offs + 1; while ((position < end_offs) && ((str[position]&0xc0) == 0x80)) position +=1; return position; } else if ((byte_encoding == ENC_WIDE) && (Py_WithinDoubleByte(str, start_offs, start_offs) == 1)) return start_offs+2; else return start_offs+1; } static PyObject * move_next_char(PyObject *self, PyObject *args) { PyObject *text; Py_ssize_t start_offs, end_offs; Py_ssize_t ret; if (!PyArg_ParseTuple(args, "O" FMT_N FMT_N, &text, &start_offs, &end_offs)) return NULL; ret = Py_MoveNextChar(text, start_offs, end_offs); return Py_BuildValue(FMT_N, ret); } //====================================================================== char calc_width_doc[] = "calc_width(string/unicode text, int start_off, int end_offs) -> int width\n\n\ Return the screen column width of text between start_offs and end_offs.\n\n\ text -- string or unicode text\n\ start_offs -- start offset\n\ end_offs -- end offset"; static Py_ssize_t Py_CalcWidth(PyObject *text, Py_ssize_t start_offs, Py_ssize_t end_offs) { unsigned char * str; Py_ssize_t i, ret[2], str_len; int screencols; Py_UNICODE *ustr; if (PyUnicode_Check(text)) //text_py is unicode string { ustr = PyUnicode_AS_UNICODE(text); screencols = 0; for(i=start_offs; i pref_col) { ret[0] = i; ret[1] = screencols; return 0; } screencols += width; } ret[0] = i; ret[1] = screencols; return 0; } #ifndef PYTHON3 if (!PyString_Check(text)) #else if (!PyBytes_Check(text)) #endif { PyErr_SetString(PyExc_TypeError, "Neither unicode nor string."); return -1; } #ifndef PYTHON3 str = (unsigned char *)PyString_AsString(text); str_len = (int) PyString_Size(text); #else str = (unsigned char *)PyBytes_AsString(text); str_len = PyBytes_Size(text); #endif if (byte_encoding == ENC_UTF8) { i = start_offs; screencols = 0; while (i pref_col) { ret[0] = i; ret[1] = screencols; return 0; } i = dummy[1]; screencols += width; } ret[0] = i; ret[1] = screencols; return 0; } // "wide" and "narrow" i = start_offs + pref_col; if (i>= end_offs) { ret[0] = end_offs; ret[1] = end_offs - start_offs; return 0; } if (byte_encoding == ENC_WIDE) if (Py_WithinDoubleByte(str, start_offs, i)==2) i -= 1; ret[0] = i; ret[1] = i - start_offs; return 0; } static PyObject * calc_text_pos(PyObject *self, PyObject *args) { PyObject *text; Py_ssize_t start_offs, end_offs, ret[2]; int pref_col, err; if (!PyArg_ParseTuple(args, "O" FMT_N FMT_N "i", &text, &start_offs, &end_offs, &pref_col)) return NULL; err = Py_CalcTextPos(text, start_offs, end_offs, pref_col, ret); if (err==-1) //an error occured return NULL; return Py_BuildValue("(" FMT_N FMT_N ")", ret[0], ret[1]); } //====================================================================== static PyMethodDef Str_UtilMethods[] = { {"get_byte_encoding", get_byte_encoding, METH_VARARGS, get_byte_encoding_doc}, {"set_byte_encoding", set_byte_encoding, METH_VARARGS, set_byte_encoding_doc}, {"get_width", get_width, METH_VARARGS, get_width_doc}, {"decode_one", decode_one, METH_VARARGS, decode_one_doc}, {"decode_one_right", decode_one_right, METH_VARARGS, decode_one_right_doc}, {"within_double_byte", within_double_byte, METH_VARARGS, within_double_byte_doc}, {"is_wide_char", is_wide_char, METH_VARARGS, is_wide_char_doc}, {"move_prev_char", move_prev_char, METH_VARARGS, move_prev_char_doc}, {"move_next_char", move_next_char, METH_VARARGS, move_next_char_doc}, {"calc_width", calc_width, METH_VARARGS, calc_width_doc}, {"calc_text_pos", calc_text_pos, METH_VARARGS, calc_text_pos_doc}, {NULL, NULL, 0, NULL} // Sentinel }; #ifndef PYTHON3 PyMODINIT_FUNC initstr_util(void) { Py_InitModule("str_util", Str_UtilMethods); } int main(int argc, char *argv[]) { //Pass argv[0] to the Python interpreter: Py_SetProgramName(argv[0]); //Initialize the Python interpreter. Py_Initialize(); //Add a static module: initstr_util(); return 0; } #else static struct PyModuleDef Str_UtilModule = { PyModuleDef_HEAD_INIT, "str_util", NULL, -1, Str_UtilMethods }; PyMODINIT_FUNC PyInit_str_util(void) { return PyModule_Create(&Str_UtilModule); } #endif