diff options
Diffstat (limited to 'Objects/unicodeobject.c')
-rw-r--r-- | Objects/unicodeobject.c | 246 |
1 files changed, 231 insertions, 15 deletions
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index e227fc72d2..3052ebd4c9 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -45,6 +45,8 @@ OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. #include "unicodeobject.h" #include "ucnhash.h" +#include "formatter_unicode.h" + #ifdef MS_WINDOWS #include <windows.h> #endif @@ -5009,21 +5011,7 @@ int PyUnicode_EncodeDecimal(Py_UNICODE *s, /* --- Helpers ------------------------------------------------------------ */ -#define STRINGLIB_CHAR Py_UNICODE - -#define STRINGLIB_LEN PyUnicode_GET_SIZE -#define STRINGLIB_NEW PyUnicode_FromUnicode -#define STRINGLIB_STR PyUnicode_AS_UNICODE - -Py_LOCAL_INLINE(int) -STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len) -{ - if (str[0] != other[0]) - return 1; - return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE)); -} - -#define STRINGLIB_EMPTY unicode_empty +#include "stringlib/unicodedefs.h" #include "stringlib/fastsearch.h" @@ -7964,6 +7952,33 @@ unicode_endswith(PyUnicodeObject *self, return PyBool_FromLong(result); } +#include "stringlib/string_format.h" + +PyDoc_STRVAR(format__doc__, +"S.format(*args, **kwargs) -> unicode\n\ +\n\ +"); + +static PyObject * +unicode_format(PyObject *self, PyObject *args, PyObject *kwds) +{ + /* this calls into stringlib/string_format.h because it can be + included for either string or unicode. this is needed for + python 2.6. */ + return do_string_format(self, args, kwds); +} + + +PyDoc_STRVAR(p_format__doc__, +"S.__format__(format_spec) -> unicode\n\ +\n\ +"); + +static PyObject * +unicode__format__(PyObject *self, PyObject *args) +{ + return unicode_unicode__format__(self, args); +} static PyObject * @@ -8019,6 +8034,8 @@ static PyMethodDef unicode_methods[] = { {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__}, {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__}, {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__}, + {"format", (PyCFunction) unicode_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, + {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__}, #if 0 {"capwords", (PyCFunction) unicode_capwords, METH_NOARGS, capwords__doc__}, #endif @@ -9124,6 +9141,205 @@ void _Py_ReleaseInternedUnicodeStrings(void) } +/********************* Formatter Iterator ************************/ + +/* this is used to implement string.Formatter.vparse(). it exists so + Formatter can share code with the built in unicode.format() + method */ + +typedef struct { + PyObject_HEAD + + /* we know this to be a unicode object, but since we just keep + it around to keep the object alive, having it as PyObject + is okay */ + PyObject *str; + + MarkupIterator it_markup; +} formatteriterobject; + +static void +formatteriter_dealloc(formatteriterobject *it) +{ + _PyObject_GC_UNTRACK(it); + Py_XDECREF(it->str); + PyObject_GC_Del(it); +} + +/* returns a tuple: + (is_markup, literal, field_name, format_spec, conversion) + if is_markup == True: + literal is None + field_name is the string before the ':' + format_spec is the string after the ':' + conversion is either None, or the string after the '!' + if is_markup == False: + literal is the literal string + field_name is None + format_spec is None + conversion is None +*/ +static PyObject * +formatteriter_next(formatteriterobject *it) +{ + SubString literal; + SubString field_name; + SubString format_spec; + Py_UNICODE conversion; + int is_markup; + int format_spec_needs_expanding; + int result = MarkupIterator_next(&it->it_markup, &is_markup, &literal, + &field_name, &format_spec, &conversion, + &format_spec_needs_expanding); + + /* all of the SubString objects point into it->str, so no + memory management needs to be done on them */ + + if (result == 0) { + /* error has already been set */ + return NULL; + } else if (result == 1) { + /* end of iterator */ + return NULL; + } else { + PyObject *is_markup_bool = NULL; + PyObject *literal_str = NULL; + PyObject *field_name_str = NULL; + PyObject *format_spec_str = NULL; + PyObject *conversion_str = NULL; + PyObject *result = NULL; + + assert(result == 2); + + is_markup_bool = PyBool_FromLong(is_markup); + if (!is_markup_bool) + goto error; + + if (is_markup) { + /* field_name, format_spec, and conversion are + returned */ + literal_str = Py_None; + Py_INCREF(literal_str); + + field_name_str = SubString_new_object(&field_name); + if (field_name_str == NULL) + goto error; + + format_spec_str = SubString_new_object(&format_spec); + if (format_spec_str == NULL) + goto error; + + /* if the conversion is not specified, return + a None, otherwise create a one length + string with the conversion characater */ + if (conversion == '\0') { + conversion_str = Py_None; + Py_INCREF(conversion_str); + } else + conversion_str = PyUnicode_FromUnicode(&conversion, + 1); + if (conversion_str == NULL) + goto error; + } else { + /* only literal is returned */ + literal_str = SubString_new_object(&literal); + if (literal_str == NULL) + goto error; + + field_name_str = Py_None; + format_spec_str = Py_None; + conversion_str = Py_None; + + Py_INCREF(field_name_str); + Py_INCREF(format_spec_str); + Py_INCREF(conversion_str); + } + /* return a tuple of values */ + result = PyTuple_Pack(5, is_markup_bool, literal_str, + field_name_str, format_spec_str, + conversion_str); + if (result == NULL) + goto error; + + return result; + error: + Py_XDECREF(is_markup_bool); + Py_XDECREF(literal_str); + Py_XDECREF(field_name_str); + Py_XDECREF(format_spec_str); + Py_XDECREF(conversion_str); + Py_XDECREF(result); + return NULL; + } +} + +static PyMethodDef formatteriter_methods[] = { + {NULL, NULL} /* sentinel */ +}; + +PyTypeObject PyFormatterIter_Type = { + PyVarObject_HEAD_INIT(&PyType_Type, 0) + "formatteriterator", /* tp_name */ + sizeof(formatteriterobject), /* tp_basicsize */ + 0, /* tp_itemsize */ + /* methods */ + (destructor)formatteriter_dealloc, /* tp_dealloc */ + 0, /* tp_print */ + 0, /* tp_getattr */ + 0, /* tp_setattr */ + 0, /* tp_compare */ + 0, /* tp_repr */ + 0, /* tp_as_number */ + 0, /* tp_as_sequence */ + 0, /* tp_as_mapping */ + 0, /* tp_hash */ + 0, /* tp_call */ + 0, /* tp_str */ + PyObject_GenericGetAttr, /* tp_getattro */ + 0, /* tp_setattro */ + 0, /* tp_as_buffer */ + Py_TPFLAGS_DEFAULT, /* tp_flags */ + 0, /* tp_doc */ + 0, /* tp_traverse */ + 0, /* tp_clear */ + 0, /* tp_richcompare */ + 0, /* tp_weaklistoffset */ + PyObject_SelfIter, /* tp_iter */ + (iternextfunc)formatteriter_next, /* tp_iternext */ + formatteriter_methods, /* tp_methods */ + 0, +}; + +PyObject * +_unicodeformatter_iterator(PyObject *str) +{ + formatteriterobject *it; + + it = PyObject_GC_New(formatteriterobject, &PyFormatterIter_Type); + if (it == NULL) + return NULL; + + /* take ownership, give the object to the iterator */ + Py_INCREF(str); + it->str = str; + + /* initialize the contained MarkupIterator */ + MarkupIterator_init(&it->it_markup, + PyUnicode_AS_UNICODE(str), + PyUnicode_GET_SIZE(str)); + + _PyObject_GC_TRACK(it); + return (PyObject *)it; +} + +PyObject * +_unicodeformatter_lookup(PyObject *field_name, PyObject *args, + PyObject *kwargs) +{ + return NULL; +} + + /********************* Unicode Iterator **************************/ typedef struct { |