diff options
Diffstat (limited to 'ext/intl/msgformat')
-rw-r--r-- | ext/intl/msgformat/msgformat.c | 2 | ||||
-rw-r--r-- | ext/intl/msgformat/msgformat_attr.c | 6 | ||||
-rw-r--r-- | ext/intl/msgformat/msgformat_data.c | 24 | ||||
-rw-r--r-- | ext/intl/msgformat/msgformat_data.h | 2 | ||||
-rw-r--r-- | ext/intl/msgformat/msgformat_format.c | 49 | ||||
-rw-r--r-- | ext/intl/msgformat/msgformat_helpers.cpp | 629 | ||||
-rw-r--r-- | ext/intl/msgformat/msgformat_helpers.h | 6 | ||||
-rw-r--r-- | ext/intl/msgformat/msgformat_parse.c | 2 |
8 files changed, 607 insertions, 113 deletions
diff --git a/ext/intl/msgformat/msgformat.c b/ext/intl/msgformat/msgformat.c index e3fb9425a9..6a9f04f32b 100644 --- a/ext/intl/msgformat/msgformat.c +++ b/ext/intl/msgformat/msgformat.c @@ -61,7 +61,7 @@ static void msgfmt_ctor(INTERNAL_FUNCTION_PARAMETERS) } if(locale_len == 0) { - locale = INTL_G(default_locale); + locale = intl_locale_get_default(TSRMLS_C); } #ifdef MSG_FORMAT_QUOTE_APOS diff --git a/ext/intl/msgformat/msgformat_attr.c b/ext/intl/msgformat/msgformat_attr.c index ed2dae27d1..c333a24ee1 100644 --- a/ext/intl/msgformat/msgformat_attr.c +++ b/ext/intl/msgformat/msgformat_attr.c @@ -102,6 +102,12 @@ PHP_FUNCTION( msgfmt_set_pattern ) } mfo->mf_data.orig_format = estrndup(value, value_len); mfo->mf_data.orig_format_len = value_len; + /* invalidate cached format types */ + if (mfo->mf_data.arg_types) { + zend_hash_destroy(mfo->mf_data.arg_types); + efree(mfo->mf_data.arg_types); + mfo->mf_data.arg_types = NULL; + } RETURN_TRUE; } diff --git a/ext/intl/msgformat/msgformat_data.c b/ext/intl/msgformat/msgformat_data.c index 527c1d4d17..5d49054473 100644 --- a/ext/intl/msgformat/msgformat_data.c +++ b/ext/intl/msgformat/msgformat_data.c @@ -31,8 +31,10 @@ void msgformat_data_init( msgformat_data* mf_data TSRMLS_DC ) if( !mf_data ) return; - mf_data->umsgf = NULL; - mf_data->orig_format = NULL; + mf_data->umsgf = NULL; + mf_data->orig_format = NULL; + mf_data->arg_types = NULL; + mf_data->tz_set = 0; intl_error_reset( &mf_data->error TSRMLS_CC ); } /* }}} */ @@ -40,21 +42,27 @@ void msgformat_data_init( msgformat_data* mf_data TSRMLS_DC ) /* {{{ void msgformat_data_free( msgformat_data* mf_data ) * Clean up memory allocated for msgformat_data */ -void msgformat_data_free( msgformat_data* mf_data TSRMLS_DC ) +void msgformat_data_free(msgformat_data* mf_data TSRMLS_DC) { - if( !mf_data ) + if (!mf_data) return; - if( mf_data->umsgf ) - umsg_close( mf_data->umsgf ); + if (mf_data->umsgf) + umsg_close(mf_data->umsgf); - if(mf_data->orig_format) { + if (mf_data->orig_format) { efree(mf_data->orig_format); mf_data->orig_format = NULL; } + if (mf_data->arg_types) { + zend_hash_destroy(mf_data->arg_types); + efree(mf_data->arg_types); + mf_data->arg_types = NULL; + } + mf_data->umsgf = NULL; - intl_error_reset( &mf_data->error TSRMLS_CC ); + intl_error_reset(&mf_data->error TSRMLS_CC); } /* }}} */ diff --git a/ext/intl/msgformat/msgformat_data.h b/ext/intl/msgformat/msgformat_data.h index 6479888f8f..51d7687a3a 100644 --- a/ext/intl/msgformat/msgformat_data.h +++ b/ext/intl/msgformat/msgformat_data.h @@ -31,6 +31,8 @@ typedef struct { UMessageFormat* umsgf; char* orig_format; ulong orig_format_len; + HashTable* arg_types; + int tz_set; /* if we've already the time zone in sub-formats */ } msgformat_data; msgformat_data* msgformat_data_create( TSRMLS_D ); diff --git a/ext/intl/msgformat/msgformat_format.c b/ext/intl/msgformat/msgformat_format.c index 9a18ac0a70..4b81cfe2b4 100644 --- a/ext/intl/msgformat/msgformat_format.c +++ b/ext/intl/msgformat/msgformat_format.c @@ -32,51 +32,34 @@ #endif /* {{{ */ -static void msgfmt_do_format(MessageFormatter_object *mfo, zval *args, zval *return_value TSRMLS_DC) +static void msgfmt_do_format(MessageFormatter_object *mfo, zval *args, zval *return_value TSRMLS_DC) { - zval **fargs; int count; UChar* formatted = NULL; int formatted_len = 0; - HashPosition pos; - int i; + HashTable *args_copy; count = zend_hash_num_elements(Z_ARRVAL_P(args)); - if(count < umsg_format_arg_count(MSG_FORMAT_OBJECT(mfo))) { - /* Not enough aguments for format! */ - intl_error_set( INTL_DATA_ERROR_P(mfo), U_ILLEGAL_ARGUMENT_ERROR, - "msgfmt_format: not enough parameters", 0 TSRMLS_CC ); - RETVAL_FALSE; - return; - } - - fargs = safe_emalloc(count, sizeof(zval *), 0); + ALLOC_HASHTABLE(args_copy); + zend_hash_init(args_copy, count, NULL, ZVAL_PTR_DTOR, 0); + zend_hash_copy(args_copy, Z_ARRVAL_P(args), (copy_ctor_func_t)zval_add_ref, + NULL, sizeof(zval*)); - zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(args), &pos); - for(i=0;i<count;i++) { - zval **val; - zend_hash_get_current_data_ex(Z_ARRVAL_P(args), (void **)&val, &pos); - fargs[i] = *val; - Z_ADDREF_P(fargs[i]); - /* TODO: needs refcount increase here? */ - zend_hash_move_forward_ex(Z_ARRVAL_P(args), &pos); - } + umsg_format_helper(mfo, args_copy, &formatted, &formatted_len TSRMLS_CC); - umsg_format_helper(MSG_FORMAT_OBJECT(mfo), count, fargs, &formatted, &formatted_len, &INTL_DATA_ERROR_CODE(mfo) TSRMLS_CC); - - for(i=0;i<count;i++) { - zval_ptr_dtor(&fargs[i]); - } + zend_hash_destroy(args_copy); + efree(args_copy); - efree(fargs); - - if (formatted && U_FAILURE( INTL_DATA_ERROR_CODE(mfo) ) ) { + if (formatted && U_FAILURE(INTL_DATA_ERROR_CODE(mfo))) { efree(formatted); } - INTL_METHOD_CHECK_STATUS( mfo, "Number formatting failed" ); - INTL_METHOD_RETVAL_UTF8( mfo, formatted, formatted_len, 1 ); + if (U_FAILURE(INTL_DATA_ERROR_CODE(mfo))) { + RETURN_FALSE; + } else { + INTL_METHOD_RETVAL_UTF8(mfo, formatted, formatted_len, 1); + } } /* }}} */ @@ -151,7 +134,7 @@ PHP_FUNCTION( msgfmt_format_message ) } if(slocale_len == 0) { - slocale = INTL_G(default_locale); + slocale = intl_locale_get_default(TSRMLS_C); } #ifdef MSG_FORMAT_QUOTE_APOS diff --git a/ext/intl/msgformat/msgformat_helpers.cpp b/ext/intl/msgformat/msgformat_helpers.cpp index 1895de2c86..9ee1cdcfb0 100644 --- a/ext/intl/msgformat/msgformat_helpers.cpp +++ b/ext/intl/msgformat/msgformat_helpers.cpp @@ -18,9 +18,20 @@ #include "config.h" #endif -#include <math.h> +#include "../intl_cppshims.h" + +#include <limits.h> #include <unicode/msgfmt.h> #include <unicode/chariter.h> +#include <unicode/ustdio.h> +#include <unicode/timezone.h> +#include <unicode/datefmt.h> +#include <unicode/calendar.h> + +#include <vector> + +#include "../intl_convertcpp.h" +#include "../common/common_date.h" extern "C" { #include "php_intl.h" @@ -28,8 +39,14 @@ extern "C" { #include "msgformat_format.h" #include "msgformat_helpers.h" #include "intl_convert.h" +#define USE_TIMEZONE_POINTER +#include "../timezone/timezone_class.h" } +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48 +#define HAS_MESSAGE_PATTERN 1 +#endif + U_NAMESPACE_BEGIN /** * This class isolates our access to private internal methods of @@ -40,96 +57,578 @@ class MessageFormatAdapter { public: static const Formattable::Type* getArgTypeList(const MessageFormat& m, int32_t& count); +#ifdef HAS_MESSAGE_PATTERN + static const MessagePattern getMessagePattern(MessageFormat* m); +#endif }; + const Formattable::Type* MessageFormatAdapter::getArgTypeList(const MessageFormat& m, int32_t& count) { return m.getArgTypeList(count); } + +#ifdef HAS_MESSAGE_PATTERN +const MessagePattern +MessageFormatAdapter::getMessagePattern(MessageFormat* m) { + return m->msgPattern; +} +#endif U_NAMESPACE_END -U_CFUNC int32_t umsg_format_arg_count(UMessageFormat *fmt) +U_CFUNC int32_t umsg_format_arg_count(UMessageFormat *fmt) { int32_t fmt_count = 0; MessageFormatAdapter::getArgTypeList(*(const MessageFormat*)fmt, fmt_count); return fmt_count; } -U_CFUNC void umsg_format_helper(UMessageFormat *fmt, int arg_count, zval **args, UChar **formatted, int *formatted_len, UErrorCode *status TSRMLS_DC) +static HashTable *umsg_get_numeric_types(MessageFormatter_object *mfo, + intl_error& err TSRMLS_DC) { - int fmt_count = 0; - const Formattable::Type* argTypes = - MessageFormatAdapter::getArgTypeList(*(const MessageFormat*)fmt, fmt_count); - Formattable* fargs = new Formattable[fmt_count ? fmt_count : 1]; + HashTable *ret; + int32_t parts_count; - for(int32_t i = 0; i < fmt_count; ++i) { - UChar *stringVal = NULL; - int stringLen = 0; - int64_t tInt64 = 0; + if (U_FAILURE(err.code)) { + return NULL; + } - switch(argTypes[i]) { - case Formattable::kDate: - convert_to_long_ex(&args[i]); - fargs[i].setDate(U_MILLIS_PER_SECOND * (double)Z_LVAL_P(args[i])); - break; + if (mfo->mf_data.arg_types) { + /* already cached */ + return mfo->mf_data.arg_types; + } - case Formattable::kDouble: - convert_to_double_ex(&args[i]); - fargs[i].setDouble(Z_DVAL_P(args[i])); - break; - - case Formattable::kLong: - convert_to_long_ex(&args[i]); - fargs[i].setLong(Z_LVAL_P(args[i])); - break; + const Formattable::Type *types = MessageFormatAdapter::getArgTypeList( + *(MessageFormat*)mfo->mf_data.umsgf, parts_count); + + /* Hash table will store Formattable::Type objects directly, + * so no need for destructor */ + ALLOC_HASHTABLE(ret); + zend_hash_init(ret, parts_count, NULL, NULL, 0); + + for (int i = 0; i < parts_count; i++) { + const Formattable::Type t = types[i]; + if (zend_hash_index_update(ret, (ulong)i, (void*)&t, sizeof(t), NULL) + == FAILURE) { + intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR, + "Write to argument types hash table failed", 0 TSRMLS_CC); + break; + } + } + + if (U_FAILURE(err.code)) { + zend_hash_destroy(ret); + efree(ret); + + return NULL; + } + + mfo->mf_data.arg_types = ret; - case Formattable::kInt64: - if(Z_TYPE_P(args[i]) == IS_DOUBLE) { - tInt64 = (int64_t)Z_DVAL_P(args[i]); - } else if(Z_TYPE_P(args[i]) == IS_LONG) { - tInt64 = (int64_t)Z_LVAL_P(args[i]); + return ret; +} + +#ifdef HAS_MESSAGE_PATTERN +static HashTable *umsg_parse_format(MessageFormatter_object *mfo, + const MessagePattern& mp, + intl_error& err TSRMLS_DC) +{ + HashTable *ret; + int32_t parts_count; + + if (U_FAILURE(err.code)) { + return NULL; + } + + if (!((MessageFormat *)mfo->mf_data.umsgf)->usesNamedArguments()) { + return umsg_get_numeric_types(mfo, err TSRMLS_CC); + } + + if (mfo->mf_data.arg_types) { + /* already cached */ + return mfo->mf_data.arg_types; + } + + /* Hash table will store Formattable::Type objects directly, + * so no need for destructor */ + ALLOC_HASHTABLE(ret); + zend_hash_init(ret, 32, NULL, NULL, 0); + + parts_count = mp.countParts(); + + // See MessageFormat::cacheExplicitFormats() + /* + * Looking through the pattern, go to each arg_start part type. + * The arg-typeof that tells us the argument type (simple, complicated) + * then the next part is either the arg_name or arg number + * and then if it's simple after that there could be a part-type=arg-type + * while substring will tell us number, spellout, etc. + * If the next thing isn't an arg-type then assume string. + */ + /* The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT + * which we need not examine. */ + for (int32_t i = 0; i < parts_count - 2 && U_SUCCESS(err.code); i++) { + MessagePattern::Part p = mp.getPart(i); + + if (p.getType() != UMSGPAT_PART_TYPE_ARG_START) { + continue; + } + + MessagePattern::Part name_part = mp.getPart(++i); /* Getting name, advancing i */ + Formattable::Type type, + *storedType; + + if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) { + UnicodeString argName = mp.getSubstring(name_part); + if (zend_hash_find(ret, (char*)argName.getBuffer(), argName.length(), + (void**)&storedType) == FAILURE) { + /* not found already; create new entry in HT */ + Formattable::Type bogusType = Formattable::kObject; + if (zend_hash_update(ret, (char*)argName.getBuffer(), argName.length(), + (void*)&bogusType, sizeof(bogusType), (void**)&storedType) == FAILURE) { + intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR, + "Write to argument types hash table failed", 0 TSRMLS_CC); + continue; + } + } + } else if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) { + int32_t argNumber = name_part.getValue(); + if (argNumber < 0) { + intl_errors_set(&err, U_INVALID_FORMAT_ERROR, + "Found part with negative number", 0 TSRMLS_CC); + continue; + } + if (zend_hash_index_find(ret, (ulong)argNumber, (void**)&storedType) + == FAILURE) { + /* not found already; create new entry in HT */ + Formattable::Type bogusType = Formattable::kObject; + if (zend_hash_index_update(ret, (ulong)argNumber, (void*)&bogusType, + sizeof(bogusType), (void**)&storedType) == FAILURE) { + intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR, + "Write to argument types hash table failed", 0 TSRMLS_CC); + continue; + } + } + } + + UMessagePatternArgType argType = p.getArgType(); + /* No type specified, treat it as a string */ + if (argType == UMSGPAT_ARG_TYPE_NONE) { + type = Formattable::kString; + } else { /* Some type was specified, might be simple or complicated */ + if (argType == UMSGPAT_ARG_TYPE_SIMPLE) { + /* For a SIMPLE arg, after the name part, there should be + * an ARG_TYPE part whose string value tells us what to do */ + MessagePattern::Part type_part = mp.getPart(++i); /* Getting type, advancing i */ + if (type_part.getType() == UMSGPAT_PART_TYPE_ARG_TYPE) { + UnicodeString typeString = mp.getSubstring(type_part); + /* This is all based on the rules in the docs for MessageFormat + * @see http://icu-project.org/apiref/icu4c/classMessageFormat.html */ + if (typeString == "number") { + MessagePattern::Part style_part = mp.getPart(i + 1); /* Not advancing i */ + if (style_part.getType() == UMSGPAT_PART_TYPE_ARG_STYLE) { + UnicodeString styleString = mp.getSubstring(style_part); + if (styleString == "integer") { + type = Formattable::kInt64; + } else if (styleString == "currency") { + type = Formattable::kDouble; + } else if (styleString == "percent") { + type = Formattable::kDouble; + } else { /* some style invalid/unknown to us */ + type = Formattable::kDouble; + } + } else { // if missing style, part, make it a double + type = Formattable::kDouble; + } + } else if ((typeString == "date") || (typeString == "time")) { + type = Formattable::kDate; + } else if ((typeString == "spellout") || (typeString == "ordinal") + || (typeString == "duration")) { + type = Formattable::kDouble; + } } else { - SEPARATE_ZVAL_IF_NOT_REF(&args[i]); - convert_scalar_to_number( args[i] TSRMLS_CC ); - tInt64 = (Z_TYPE_P(args[i]) == IS_DOUBLE)?(int64_t)Z_DVAL_P(args[i]):Z_LVAL_P(args[i]); + /* If there's no UMSGPAT_PART_TYPE_ARG_TYPE right after a + * UMSGPAT_ARG_TYPE_SIMPLE argument, then the pattern + * is broken. */ + intl_errors_set(&err, U_PARSE_ERROR, + "Expected UMSGPAT_PART_TYPE_ARG_TYPE part following " + "UMSGPAT_ARG_TYPE_SIMPLE part", 0 TSRMLS_CC); + continue; + } + } else if (argType == UMSGPAT_ARG_TYPE_PLURAL) { + type = Formattable::kDouble; + } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) { + type = Formattable::kDouble; + } else if (argType == UMSGPAT_ARG_TYPE_SELECT) { + type = Formattable::kString; + } else { + type = Formattable::kString; + } + } /* was type specified? */ + + /* We found a different type for the same arg! */ + if (*storedType != Formattable::kObject && *storedType != type) { + intl_errors_set(&err, U_ARGUMENT_TYPE_MISMATCH, + "Inconsistent types declared for an argument", 0 TSRMLS_CC); + continue; + } + + *storedType = type; + } /* visiting each part */ + + if (U_FAILURE(err.code)) { + zend_hash_destroy(ret); + efree(ret); + + return NULL; + } + + mfo->mf_data.arg_types = ret; + + return ret; +} +#endif + +static HashTable *umsg_get_types(MessageFormatter_object *mfo, + intl_error& err TSRMLS_DC) +{ + MessageFormat *mf = (MessageFormat *)mfo->mf_data.umsgf; + +#ifdef HAS_MESSAGE_PATTERN + const MessagePattern mp = MessageFormatAdapter::getMessagePattern(mf); + + return umsg_parse_format(mfo, mp, err TSRMLS_CC); +#else + if (mf->usesNamedArguments()) { + intl_errors_set(&err, U_UNSUPPORTED_ERROR, + "This extension supports named arguments only on ICU 4.8+", + 0 TSRMLS_CC); + return NULL; + } + return umsg_get_numeric_types(mfo, err TSRMLS_CC); +#endif +} + +static void umsg_set_timezone(MessageFormatter_object *mfo, + intl_error& err TSRMLS_DC) +{ + MessageFormat *mf = (MessageFormat *)mfo->mf_data.umsgf; + TimeZone *used_tz = NULL; + const Format **formats; + int32_t count; + + /* Unfortanely, this cannot change the time zone for arguments that + * appear inside complex formats because ::getFormats() returns NULL + * for all uncached formats, which is the case for complex formats + * unless they were set via one of the ::setFormat() methods */ + + if (mfo->mf_data.tz_set) { + return; /* already done */ + } + + formats = mf->getFormats(count); + + if (formats == NULL) { + intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR, + "Out of memory retrieving subformats", 0 TSRMLS_CC); + } + + for (int i = 0; U_SUCCESS(err.code) && i < count; i++) { + DateFormat* df = dynamic_cast<DateFormat*>( + const_cast<Format *>(formats[i])); + if (df == NULL) { + continue; + } + + if (used_tz == NULL) { + zval nullzv = zval_used_for_init, + *zvptr = &nullzv; + used_tz = timezone_process_timezone_argument(&zvptr, &err, + "msgfmt_format" TSRMLS_CC); + if (used_tz == NULL) { + continue; + } + } + + df->setTimeZone(*used_tz); + } + + if (U_SUCCESS(err.code)) { + mfo->mf_data.tz_set = 1; + } +} + +U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo, + HashTable *args, + UChar **formatted, + int *formatted_len TSRMLS_DC) +{ + int arg_count = zend_hash_num_elements(args); + std::vector<Formattable> fargs; + std::vector<UnicodeString> farg_names; + MessageFormat *mf = (MessageFormat *)mfo->mf_data.umsgf; + HashTable *types; + intl_error& err = INTL_DATA_ERROR(mfo); + + if (U_FAILURE(err.code)) { + return; + } + + types = umsg_get_types(mfo, err TSRMLS_CC); + + umsg_set_timezone(mfo, err TSRMLS_CC); + + fargs.resize(arg_count); + farg_names.resize(arg_count); + + int argNum = 0; + HashPosition pos; + zval **elem; + + // Key related variables + int key_type; + char *str_index; + uint str_len; + ulong num_index; + + for (zend_hash_internal_pointer_reset_ex(args, &pos); + U_SUCCESS(err.code) && + (key_type = zend_hash_get_current_key_ex( + args, &str_index, &str_len, &num_index, 0, &pos), + zend_hash_get_current_data_ex(args, (void **)&elem, &pos) + ) == SUCCESS; + zend_hash_move_forward_ex(args, &pos), argNum++) + { + Formattable& formattable = fargs[argNum]; + UnicodeString& key = farg_names[argNum]; + Formattable::Type argType = Formattable::kObject, //unknown + *storedArgType = NULL; + + /* Process key and retrieve type */ + if (key_type == HASH_KEY_IS_LONG) { + /* includes case where index < 0 because it's exposed as unsigned */ + if (num_index > (ulong)INT32_MAX) { + intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR, + "Found negative or too large array key", 0 TSRMLS_CC); + continue; + } + + UChar temp[16]; + int32_t len = u_sprintf(temp, "%u", (uint32_t)num_index); + key.append(temp, len); + + zend_hash_index_find(types, (ulong)num_index, (void**)&storedArgType); + } else { //string; assumed to be in UTF-8 + intl_stringFromChar(key, str_index, str_len-1, &err.code); + + if (U_FAILURE(err.code)) { + char *message; + spprintf(&message, 0, + "Invalid UTF-8 data in argument key: '%s'", str_index); + intl_errors_set(&err, err.code, message, 1 TSRMLS_CC); + efree(message); + continue; + } + + zend_hash_find(types, (char*)key.getBuffer(), key.length(), + (void**)&storedArgType); + } + + if (storedArgType != NULL) { + argType = *storedArgType; + } + + /* Convert zval to formattable according to message format type + * or (as a fallback) the zval type */ + if (argType != Formattable::kObject) { + switch (argType) { + case Formattable::kString: + { + string_arg: + /* This implicitly converts objects + * Note that our vectors will leak if object conversion fails + * and PHP ends up with a fatal error and calls longjmp + * as a result of that. + */ + convert_to_string_ex(elem); + + UnicodeString *text = new UnicodeString(); + intl_stringFromChar(*text, + Z_STRVAL_PP(elem), Z_STRLEN_PP(elem), &err.code); + + if (U_FAILURE(err.code)) { + char *message; + spprintf(&message, 0, "Invalid UTF-8 data in string argument: " + "'%s'", Z_STRVAL_PP(elem)); + intl_errors_set(&err, err.code, message, 1 TSRMLS_CC); + efree(message); + delete text; + continue; + } + formattable.adoptString(text); + break; } - fargs[i].setInt64(tInt64); + case Formattable::kDouble: + { + double d; + if (Z_TYPE_PP(elem) == IS_DOUBLE) { + d = Z_DVAL_PP(elem); + } else if (Z_TYPE_PP(elem) == IS_LONG) { + d = (double)Z_LVAL_PP(elem); + } else { + SEPARATE_ZVAL_IF_NOT_REF(elem); + convert_scalar_to_number(*elem TSRMLS_CC); + d = (Z_TYPE_PP(elem) == IS_DOUBLE) + ? Z_DVAL_PP(elem) + : (double)Z_LVAL_PP(elem); + } + formattable.setDouble(d); + break; + } + case Formattable::kLong: + { + int32_t tInt32; +retry_klong: + if (Z_TYPE_PP(elem) == IS_DOUBLE) { + if (Z_DVAL_PP(elem) > (double)INT32_MAX || + Z_DVAL_PP(elem) < (double)INT32_MIN) { + intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR, + "Found PHP float with absolute value too large for " + "32 bit integer argument", 0 TSRMLS_CC); + } else { + tInt32 = (int32_t)Z_DVAL_PP(elem); + } + } else if (Z_TYPE_PP(elem) == IS_LONG) { + if (Z_LVAL_PP(elem) > INT32_MAX || + Z_LVAL_PP(elem) < INT32_MIN) { + intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR, + "Found PHP integer with absolute value too large " + "for 32 bit integer argument", 0 TSRMLS_CC); + } else { + tInt32 = (int32_t)Z_LVAL_PP(elem); + } + } else { + SEPARATE_ZVAL_IF_NOT_REF(elem); + convert_scalar_to_number(*elem TSRMLS_CC); + goto retry_klong; + } + formattable.setLong(tInt32); + break; + } + case Formattable::kInt64: + { + int64_t tInt64; +retry_kint64: + if (Z_TYPE_PP(elem) == IS_DOUBLE) { + if (Z_DVAL_PP(elem) > (double)U_INT64_MAX || + Z_DVAL_PP(elem) < (double)U_INT64_MIN) { + intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR, + "Found PHP float with absolute value too large for " + "64 bit integer argument", 0 TSRMLS_CC); + } else { + tInt64 = (int64_t)Z_DVAL_PP(elem); + } + } else if (Z_TYPE_PP(elem) == IS_LONG) { + /* assume long is not wider than 64 bits */ + tInt64 = (int64_t)Z_LVAL_PP(elem); + } else { + SEPARATE_ZVAL_IF_NOT_REF(elem); + convert_scalar_to_number(*elem TSRMLS_CC); + goto retry_kint64; + } + formattable.setInt64(tInt64); + break; + } + case Formattable::kDate: + { + double dd = intl_zval_to_millis(*elem, &err, "msgfmt_format" TSRMLS_CC); + if (U_FAILURE(err.code)) { + char *message, *key_char; + int key_len; + UErrorCode status = UErrorCode(); + if (intl_charFromString(key, &key_char, &key_len, + &status) == SUCCESS) { + spprintf(&message, 0, "The argument for key '%s' " + "cannot be used as a date or time", key_char); + intl_errors_set(&err, err.code, message, 1 TSRMLS_CC); + efree(key_char); + efree(message); + } + continue; + } + formattable.setDate(dd); + break; + } + default: + intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR, + "Found unsupported argument type", 0 TSRMLS_CC); + break; + } + } else { + /* We couldn't find any information about the argument in the pattern, this + * means it's an extra argument. So convert it to a number if it's a number or + * bool or null and to a string if it's anything else except arrays . */ + switch (Z_TYPE_PP(elem)) { + case IS_DOUBLE: + formattable.setDouble(Z_DVAL_PP(elem)); break; - - case Formattable::kString: - convert_to_string_ex(&args[i]); - intl_convert_utf8_to_utf16(&stringVal, &stringLen, Z_STRVAL_P(args[i]), Z_STRLEN_P(args[i]), status); - if(U_FAILURE(*status)){ - delete[] fargs; - return; + case IS_BOOL: + convert_to_long_ex(elem); + /* Intentional fallthrough */ + case IS_LONG: + formattable.setInt64((int64_t)Z_LVAL_PP(elem)); + break; + case IS_NULL: + formattable.setInt64((int64_t)0); + break; + case IS_STRING: + case IS_OBJECT: + goto string_arg; + default: + { + char *message, *key_char; + int key_len; + UErrorCode status = UErrorCode(); + if (intl_charFromString(key, &key_char, &key_len, + &status) == SUCCESS) { + spprintf(&message, 0, "No strategy to convert the " + "value given for the argument with key '%s' " + "is available", key_char); + intl_errors_set(&err, + U_ILLEGAL_ARGUMENT_ERROR, message, 1 TSRMLS_CC); + efree(key_char); + efree(message); + } } - fargs[i].setString(stringVal); - efree(stringVal); - break; - - case Formattable::kArray: - case Formattable::kObject: - *status = U_UNSUPPORTED_ERROR; - delete[] fargs; - return; - } + } + } + } // visiting each argument + + if (U_FAILURE(err.code)) { + return; } - UnicodeString resultStr; - FieldPosition fieldPosition(0); - - /* format the message */ - ((const MessageFormat*)fmt)->format(fargs, fmt_count, resultStr, fieldPosition, *status); + UnicodeString resultStr; + FieldPosition fieldPosition(0); - delete[] fargs; + /* format the message */ + mf->format(farg_names.empty() ? NULL : &farg_names[0], + fargs.empty() ? NULL : &fargs[0], arg_count, resultStr, err.code); - if(U_FAILURE(*status)){ - return; - } + if (U_FAILURE(err.code)) { + intl_errors_set(&err, err.code, + "Call to ICU MessageFormat::format() has failed", 0 TSRMLS_CC); + return; + } *formatted_len = resultStr.length(); *formatted = eumalloc(*formatted_len+1); - resultStr.extract(*formatted, *formatted_len+1, *status); + resultStr.extract(*formatted, *formatted_len+1, err.code); + if (U_FAILURE(err.code)) { + intl_errors_set(&err, err.code, + "Error copying format() result", 0 TSRMLS_CC); + return; + } } #define cleanup_zvals() for(int j=i;j>=0;j--) { zval_ptr_dtor((*args)+i); } @@ -154,15 +653,11 @@ U_CFUNC void umsg_parse_helper(UMessageFormat *fmt, int *count, zval ***args, UC int stmp_len; ALLOC_INIT_ZVAL((*args)[i]); - + switch(fargs[i].getType()) { case Formattable::kDate: aDate = ((double)fargs[i].getDate())/U_MILLIS_PER_SECOND; - if(aDate > LONG_MAX || aDate < -LONG_MAX) { - ZVAL_DOUBLE((*args)[i], aDate<0?ceil(aDate):floor(aDate)); - } else { - ZVAL_LONG((*args)[i], (long)aDate); - } + ZVAL_DOUBLE((*args)[i], aDate); break; case Formattable::kDouble: diff --git a/ext/intl/msgformat/msgformat_helpers.h b/ext/intl/msgformat/msgformat_helpers.h index 30c7e3930f..e6eda087d2 100644 --- a/ext/intl/msgformat/msgformat_helpers.h +++ b/ext/intl/msgformat/msgformat_helpers.h @@ -17,9 +17,9 @@ #ifndef MSG_FORMAT_HELPERS_H #define MSG_FORMAT_HELPERS_H -int32_t umsg_format_arg_count(UMessageFormat *fmt); -void umsg_format_helper(UMessageFormat *fmt, int arg_count, zval **args, - UChar **formatted, int *formatted_len, UErrorCode *status TSRMLS_DC); +int32_t umsg_format_arg_count(UMessageFormat *fmt); +void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args, + UChar **formatted, int *formatted_len TSRMLS_DC); void umsg_parse_helper(UMessageFormat *fmt, int *count, zval ***args, UChar *source, int source_len, UErrorCode *status); #endif // MSG_FORMAT_HELPERS_H diff --git a/ext/intl/msgformat/msgformat_parse.c b/ext/intl/msgformat/msgformat_parse.c index f540b1d0c4..413d3b1f15 100644 --- a/ext/intl/msgformat/msgformat_parse.c +++ b/ext/intl/msgformat/msgformat_parse.c @@ -126,7 +126,7 @@ PHP_FUNCTION( msgfmt_parse_message ) } if(slocale_len == 0) { - slocale = INTL_G(default_locale); + slocale = intl_locale_get_default(TSRMLS_C); } #ifdef MSG_FORMAT_QUOTE_APOS |