summaryrefslogtreecommitdiff
path: root/ext/intl/msgformat
diff options
context:
space:
mode:
Diffstat (limited to 'ext/intl/msgformat')
-rw-r--r--ext/intl/msgformat/msgformat.c2
-rw-r--r--ext/intl/msgformat/msgformat_attr.c6
-rw-r--r--ext/intl/msgformat/msgformat_data.c24
-rw-r--r--ext/intl/msgformat/msgformat_data.h2
-rw-r--r--ext/intl/msgformat/msgformat_format.c49
-rw-r--r--ext/intl/msgformat/msgformat_helpers.cpp629
-rw-r--r--ext/intl/msgformat/msgformat_helpers.h6
-rw-r--r--ext/intl/msgformat/msgformat_parse.c2
8 files changed, 607 insertions, 113 deletions
diff --git a/ext/intl/msgformat/msgformat.c b/ext/intl/msgformat/msgformat.c
index e3fb9425a9..6a9f04f32b 100644
--- a/ext/intl/msgformat/msgformat.c
+++ b/ext/intl/msgformat/msgformat.c
@@ -61,7 +61,7 @@ static void msgfmt_ctor(INTERNAL_FUNCTION_PARAMETERS)
}
if(locale_len == 0) {
- locale = INTL_G(default_locale);
+ locale = intl_locale_get_default(TSRMLS_C);
}
#ifdef MSG_FORMAT_QUOTE_APOS
diff --git a/ext/intl/msgformat/msgformat_attr.c b/ext/intl/msgformat/msgformat_attr.c
index ed2dae27d1..c333a24ee1 100644
--- a/ext/intl/msgformat/msgformat_attr.c
+++ b/ext/intl/msgformat/msgformat_attr.c
@@ -102,6 +102,12 @@ PHP_FUNCTION( msgfmt_set_pattern )
}
mfo->mf_data.orig_format = estrndup(value, value_len);
mfo->mf_data.orig_format_len = value_len;
+ /* invalidate cached format types */
+ if (mfo->mf_data.arg_types) {
+ zend_hash_destroy(mfo->mf_data.arg_types);
+ efree(mfo->mf_data.arg_types);
+ mfo->mf_data.arg_types = NULL;
+ }
RETURN_TRUE;
}
diff --git a/ext/intl/msgformat/msgformat_data.c b/ext/intl/msgformat/msgformat_data.c
index 527c1d4d17..5d49054473 100644
--- a/ext/intl/msgformat/msgformat_data.c
+++ b/ext/intl/msgformat/msgformat_data.c
@@ -31,8 +31,10 @@ void msgformat_data_init( msgformat_data* mf_data TSRMLS_DC )
if( !mf_data )
return;
- mf_data->umsgf = NULL;
- mf_data->orig_format = NULL;
+ mf_data->umsgf = NULL;
+ mf_data->orig_format = NULL;
+ mf_data->arg_types = NULL;
+ mf_data->tz_set = 0;
intl_error_reset( &mf_data->error TSRMLS_CC );
}
/* }}} */
@@ -40,21 +42,27 @@ void msgformat_data_init( msgformat_data* mf_data TSRMLS_DC )
/* {{{ void msgformat_data_free( msgformat_data* mf_data )
* Clean up memory allocated for msgformat_data
*/
-void msgformat_data_free( msgformat_data* mf_data TSRMLS_DC )
+void msgformat_data_free(msgformat_data* mf_data TSRMLS_DC)
{
- if( !mf_data )
+ if (!mf_data)
return;
- if( mf_data->umsgf )
- umsg_close( mf_data->umsgf );
+ if (mf_data->umsgf)
+ umsg_close(mf_data->umsgf);
- if(mf_data->orig_format) {
+ if (mf_data->orig_format) {
efree(mf_data->orig_format);
mf_data->orig_format = NULL;
}
+ if (mf_data->arg_types) {
+ zend_hash_destroy(mf_data->arg_types);
+ efree(mf_data->arg_types);
+ mf_data->arg_types = NULL;
+ }
+
mf_data->umsgf = NULL;
- intl_error_reset( &mf_data->error TSRMLS_CC );
+ intl_error_reset(&mf_data->error TSRMLS_CC);
}
/* }}} */
diff --git a/ext/intl/msgformat/msgformat_data.h b/ext/intl/msgformat/msgformat_data.h
index 6479888f8f..51d7687a3a 100644
--- a/ext/intl/msgformat/msgformat_data.h
+++ b/ext/intl/msgformat/msgformat_data.h
@@ -31,6 +31,8 @@ typedef struct {
UMessageFormat* umsgf;
char* orig_format;
ulong orig_format_len;
+ HashTable* arg_types;
+ int tz_set; /* if we've already the time zone in sub-formats */
} msgformat_data;
msgformat_data* msgformat_data_create( TSRMLS_D );
diff --git a/ext/intl/msgformat/msgformat_format.c b/ext/intl/msgformat/msgformat_format.c
index 9a18ac0a70..4b81cfe2b4 100644
--- a/ext/intl/msgformat/msgformat_format.c
+++ b/ext/intl/msgformat/msgformat_format.c
@@ -32,51 +32,34 @@
#endif
/* {{{ */
-static void msgfmt_do_format(MessageFormatter_object *mfo, zval *args, zval *return_value TSRMLS_DC)
+static void msgfmt_do_format(MessageFormatter_object *mfo, zval *args, zval *return_value TSRMLS_DC)
{
- zval **fargs;
int count;
UChar* formatted = NULL;
int formatted_len = 0;
- HashPosition pos;
- int i;
+ HashTable *args_copy;
count = zend_hash_num_elements(Z_ARRVAL_P(args));
- if(count < umsg_format_arg_count(MSG_FORMAT_OBJECT(mfo))) {
- /* Not enough aguments for format! */
- intl_error_set( INTL_DATA_ERROR_P(mfo), U_ILLEGAL_ARGUMENT_ERROR,
- "msgfmt_format: not enough parameters", 0 TSRMLS_CC );
- RETVAL_FALSE;
- return;
- }
-
- fargs = safe_emalloc(count, sizeof(zval *), 0);
+ ALLOC_HASHTABLE(args_copy);
+ zend_hash_init(args_copy, count, NULL, ZVAL_PTR_DTOR, 0);
+ zend_hash_copy(args_copy, Z_ARRVAL_P(args), (copy_ctor_func_t)zval_add_ref,
+ NULL, sizeof(zval*));
- zend_hash_internal_pointer_reset_ex(Z_ARRVAL_P(args), &pos);
- for(i=0;i<count;i++) {
- zval **val;
- zend_hash_get_current_data_ex(Z_ARRVAL_P(args), (void **)&val, &pos);
- fargs[i] = *val;
- Z_ADDREF_P(fargs[i]);
- /* TODO: needs refcount increase here? */
- zend_hash_move_forward_ex(Z_ARRVAL_P(args), &pos);
- }
+ umsg_format_helper(mfo, args_copy, &formatted, &formatted_len TSRMLS_CC);
- umsg_format_helper(MSG_FORMAT_OBJECT(mfo), count, fargs, &formatted, &formatted_len, &INTL_DATA_ERROR_CODE(mfo) TSRMLS_CC);
-
- for(i=0;i<count;i++) {
- zval_ptr_dtor(&fargs[i]);
- }
+ zend_hash_destroy(args_copy);
+ efree(args_copy);
- efree(fargs);
-
- if (formatted && U_FAILURE( INTL_DATA_ERROR_CODE(mfo) ) ) {
+ if (formatted && U_FAILURE(INTL_DATA_ERROR_CODE(mfo))) {
efree(formatted);
}
- INTL_METHOD_CHECK_STATUS( mfo, "Number formatting failed" );
- INTL_METHOD_RETVAL_UTF8( mfo, formatted, formatted_len, 1 );
+ if (U_FAILURE(INTL_DATA_ERROR_CODE(mfo))) {
+ RETURN_FALSE;
+ } else {
+ INTL_METHOD_RETVAL_UTF8(mfo, formatted, formatted_len, 1);
+ }
}
/* }}} */
@@ -151,7 +134,7 @@ PHP_FUNCTION( msgfmt_format_message )
}
if(slocale_len == 0) {
- slocale = INTL_G(default_locale);
+ slocale = intl_locale_get_default(TSRMLS_C);
}
#ifdef MSG_FORMAT_QUOTE_APOS
diff --git a/ext/intl/msgformat/msgformat_helpers.cpp b/ext/intl/msgformat/msgformat_helpers.cpp
index 1895de2c86..9ee1cdcfb0 100644
--- a/ext/intl/msgformat/msgformat_helpers.cpp
+++ b/ext/intl/msgformat/msgformat_helpers.cpp
@@ -18,9 +18,20 @@
#include "config.h"
#endif
-#include <math.h>
+#include "../intl_cppshims.h"
+
+#include <limits.h>
#include <unicode/msgfmt.h>
#include <unicode/chariter.h>
+#include <unicode/ustdio.h>
+#include <unicode/timezone.h>
+#include <unicode/datefmt.h>
+#include <unicode/calendar.h>
+
+#include <vector>
+
+#include "../intl_convertcpp.h"
+#include "../common/common_date.h"
extern "C" {
#include "php_intl.h"
@@ -28,8 +39,14 @@ extern "C" {
#include "msgformat_format.h"
#include "msgformat_helpers.h"
#include "intl_convert.h"
+#define USE_TIMEZONE_POINTER
+#include "../timezone/timezone_class.h"
}
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
+#define HAS_MESSAGE_PATTERN 1
+#endif
+
U_NAMESPACE_BEGIN
/**
* This class isolates our access to private internal methods of
@@ -40,96 +57,578 @@ class MessageFormatAdapter {
public:
static const Formattable::Type* getArgTypeList(const MessageFormat& m,
int32_t& count);
+#ifdef HAS_MESSAGE_PATTERN
+ static const MessagePattern getMessagePattern(MessageFormat* m);
+#endif
};
+
const Formattable::Type*
MessageFormatAdapter::getArgTypeList(const MessageFormat& m,
int32_t& count) {
return m.getArgTypeList(count);
}
+
+#ifdef HAS_MESSAGE_PATTERN
+const MessagePattern
+MessageFormatAdapter::getMessagePattern(MessageFormat* m) {
+ return m->msgPattern;
+}
+#endif
U_NAMESPACE_END
-U_CFUNC int32_t umsg_format_arg_count(UMessageFormat *fmt)
+U_CFUNC int32_t umsg_format_arg_count(UMessageFormat *fmt)
{
int32_t fmt_count = 0;
MessageFormatAdapter::getArgTypeList(*(const MessageFormat*)fmt, fmt_count);
return fmt_count;
}
-U_CFUNC void umsg_format_helper(UMessageFormat *fmt, int arg_count, zval **args, UChar **formatted, int *formatted_len, UErrorCode *status TSRMLS_DC)
+static HashTable *umsg_get_numeric_types(MessageFormatter_object *mfo,
+ intl_error& err TSRMLS_DC)
{
- int fmt_count = 0;
- const Formattable::Type* argTypes =
- MessageFormatAdapter::getArgTypeList(*(const MessageFormat*)fmt, fmt_count);
- Formattable* fargs = new Formattable[fmt_count ? fmt_count : 1];
+ HashTable *ret;
+ int32_t parts_count;
- for(int32_t i = 0; i < fmt_count; ++i) {
- UChar *stringVal = NULL;
- int stringLen = 0;
- int64_t tInt64 = 0;
+ if (U_FAILURE(err.code)) {
+ return NULL;
+ }
- switch(argTypes[i]) {
- case Formattable::kDate:
- convert_to_long_ex(&args[i]);
- fargs[i].setDate(U_MILLIS_PER_SECOND * (double)Z_LVAL_P(args[i]));
- break;
+ if (mfo->mf_data.arg_types) {
+ /* already cached */
+ return mfo->mf_data.arg_types;
+ }
- case Formattable::kDouble:
- convert_to_double_ex(&args[i]);
- fargs[i].setDouble(Z_DVAL_P(args[i]));
- break;
-
- case Formattable::kLong:
- convert_to_long_ex(&args[i]);
- fargs[i].setLong(Z_LVAL_P(args[i]));
- break;
+ const Formattable::Type *types = MessageFormatAdapter::getArgTypeList(
+ *(MessageFormat*)mfo->mf_data.umsgf, parts_count);
+
+ /* Hash table will store Formattable::Type objects directly,
+ * so no need for destructor */
+ ALLOC_HASHTABLE(ret);
+ zend_hash_init(ret, parts_count, NULL, NULL, 0);
+
+ for (int i = 0; i < parts_count; i++) {
+ const Formattable::Type t = types[i];
+ if (zend_hash_index_update(ret, (ulong)i, (void*)&t, sizeof(t), NULL)
+ == FAILURE) {
+ intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR,
+ "Write to argument types hash table failed", 0 TSRMLS_CC);
+ break;
+ }
+ }
+
+ if (U_FAILURE(err.code)) {
+ zend_hash_destroy(ret);
+ efree(ret);
+
+ return NULL;
+ }
+
+ mfo->mf_data.arg_types = ret;
- case Formattable::kInt64:
- if(Z_TYPE_P(args[i]) == IS_DOUBLE) {
- tInt64 = (int64_t)Z_DVAL_P(args[i]);
- } else if(Z_TYPE_P(args[i]) == IS_LONG) {
- tInt64 = (int64_t)Z_LVAL_P(args[i]);
+ return ret;
+}
+
+#ifdef HAS_MESSAGE_PATTERN
+static HashTable *umsg_parse_format(MessageFormatter_object *mfo,
+ const MessagePattern& mp,
+ intl_error& err TSRMLS_DC)
+{
+ HashTable *ret;
+ int32_t parts_count;
+
+ if (U_FAILURE(err.code)) {
+ return NULL;
+ }
+
+ if (!((MessageFormat *)mfo->mf_data.umsgf)->usesNamedArguments()) {
+ return umsg_get_numeric_types(mfo, err TSRMLS_CC);
+ }
+
+ if (mfo->mf_data.arg_types) {
+ /* already cached */
+ return mfo->mf_data.arg_types;
+ }
+
+ /* Hash table will store Formattable::Type objects directly,
+ * so no need for destructor */
+ ALLOC_HASHTABLE(ret);
+ zend_hash_init(ret, 32, NULL, NULL, 0);
+
+ parts_count = mp.countParts();
+
+ // See MessageFormat::cacheExplicitFormats()
+ /*
+ * Looking through the pattern, go to each arg_start part type.
+ * The arg-typeof that tells us the argument type (simple, complicated)
+ * then the next part is either the arg_name or arg number
+ * and then if it's simple after that there could be a part-type=arg-type
+ * while substring will tell us number, spellout, etc.
+ * If the next thing isn't an arg-type then assume string.
+ */
+ /* The last two "parts" can at most be ARG_LIMIT and MSG_LIMIT
+ * which we need not examine. */
+ for (int32_t i = 0; i < parts_count - 2 && U_SUCCESS(err.code); i++) {
+ MessagePattern::Part p = mp.getPart(i);
+
+ if (p.getType() != UMSGPAT_PART_TYPE_ARG_START) {
+ continue;
+ }
+
+ MessagePattern::Part name_part = mp.getPart(++i); /* Getting name, advancing i */
+ Formattable::Type type,
+ *storedType;
+
+ if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NAME) {
+ UnicodeString argName = mp.getSubstring(name_part);
+ if (zend_hash_find(ret, (char*)argName.getBuffer(), argName.length(),
+ (void**)&storedType) == FAILURE) {
+ /* not found already; create new entry in HT */
+ Formattable::Type bogusType = Formattable::kObject;
+ if (zend_hash_update(ret, (char*)argName.getBuffer(), argName.length(),
+ (void*)&bogusType, sizeof(bogusType), (void**)&storedType) == FAILURE) {
+ intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR,
+ "Write to argument types hash table failed", 0 TSRMLS_CC);
+ continue;
+ }
+ }
+ } else if (name_part.getType() == UMSGPAT_PART_TYPE_ARG_NUMBER) {
+ int32_t argNumber = name_part.getValue();
+ if (argNumber < 0) {
+ intl_errors_set(&err, U_INVALID_FORMAT_ERROR,
+ "Found part with negative number", 0 TSRMLS_CC);
+ continue;
+ }
+ if (zend_hash_index_find(ret, (ulong)argNumber, (void**)&storedType)
+ == FAILURE) {
+ /* not found already; create new entry in HT */
+ Formattable::Type bogusType = Formattable::kObject;
+ if (zend_hash_index_update(ret, (ulong)argNumber, (void*)&bogusType,
+ sizeof(bogusType), (void**)&storedType) == FAILURE) {
+ intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR,
+ "Write to argument types hash table failed", 0 TSRMLS_CC);
+ continue;
+ }
+ }
+ }
+
+ UMessagePatternArgType argType = p.getArgType();
+ /* No type specified, treat it as a string */
+ if (argType == UMSGPAT_ARG_TYPE_NONE) {
+ type = Formattable::kString;
+ } else { /* Some type was specified, might be simple or complicated */
+ if (argType == UMSGPAT_ARG_TYPE_SIMPLE) {
+ /* For a SIMPLE arg, after the name part, there should be
+ * an ARG_TYPE part whose string value tells us what to do */
+ MessagePattern::Part type_part = mp.getPart(++i); /* Getting type, advancing i */
+ if (type_part.getType() == UMSGPAT_PART_TYPE_ARG_TYPE) {
+ UnicodeString typeString = mp.getSubstring(type_part);
+ /* This is all based on the rules in the docs for MessageFormat
+ * @see http://icu-project.org/apiref/icu4c/classMessageFormat.html */
+ if (typeString == "number") {
+ MessagePattern::Part style_part = mp.getPart(i + 1); /* Not advancing i */
+ if (style_part.getType() == UMSGPAT_PART_TYPE_ARG_STYLE) {
+ UnicodeString styleString = mp.getSubstring(style_part);
+ if (styleString == "integer") {
+ type = Formattable::kInt64;
+ } else if (styleString == "currency") {
+ type = Formattable::kDouble;
+ } else if (styleString == "percent") {
+ type = Formattable::kDouble;
+ } else { /* some style invalid/unknown to us */
+ type = Formattable::kDouble;
+ }
+ } else { // if missing style, part, make it a double
+ type = Formattable::kDouble;
+ }
+ } else if ((typeString == "date") || (typeString == "time")) {
+ type = Formattable::kDate;
+ } else if ((typeString == "spellout") || (typeString == "ordinal")
+ || (typeString == "duration")) {
+ type = Formattable::kDouble;
+ }
} else {
- SEPARATE_ZVAL_IF_NOT_REF(&args[i]);
- convert_scalar_to_number( args[i] TSRMLS_CC );
- tInt64 = (Z_TYPE_P(args[i]) == IS_DOUBLE)?(int64_t)Z_DVAL_P(args[i]):Z_LVAL_P(args[i]);
+ /* If there's no UMSGPAT_PART_TYPE_ARG_TYPE right after a
+ * UMSGPAT_ARG_TYPE_SIMPLE argument, then the pattern
+ * is broken. */
+ intl_errors_set(&err, U_PARSE_ERROR,
+ "Expected UMSGPAT_PART_TYPE_ARG_TYPE part following "
+ "UMSGPAT_ARG_TYPE_SIMPLE part", 0 TSRMLS_CC);
+ continue;
+ }
+ } else if (argType == UMSGPAT_ARG_TYPE_PLURAL) {
+ type = Formattable::kDouble;
+ } else if (argType == UMSGPAT_ARG_TYPE_CHOICE) {
+ type = Formattable::kDouble;
+ } else if (argType == UMSGPAT_ARG_TYPE_SELECT) {
+ type = Formattable::kString;
+ } else {
+ type = Formattable::kString;
+ }
+ } /* was type specified? */
+
+ /* We found a different type for the same arg! */
+ if (*storedType != Formattable::kObject && *storedType != type) {
+ intl_errors_set(&err, U_ARGUMENT_TYPE_MISMATCH,
+ "Inconsistent types declared for an argument", 0 TSRMLS_CC);
+ continue;
+ }
+
+ *storedType = type;
+ } /* visiting each part */
+
+ if (U_FAILURE(err.code)) {
+ zend_hash_destroy(ret);
+ efree(ret);
+
+ return NULL;
+ }
+
+ mfo->mf_data.arg_types = ret;
+
+ return ret;
+}
+#endif
+
+static HashTable *umsg_get_types(MessageFormatter_object *mfo,
+ intl_error& err TSRMLS_DC)
+{
+ MessageFormat *mf = (MessageFormat *)mfo->mf_data.umsgf;
+
+#ifdef HAS_MESSAGE_PATTERN
+ const MessagePattern mp = MessageFormatAdapter::getMessagePattern(mf);
+
+ return umsg_parse_format(mfo, mp, err TSRMLS_CC);
+#else
+ if (mf->usesNamedArguments()) {
+ intl_errors_set(&err, U_UNSUPPORTED_ERROR,
+ "This extension supports named arguments only on ICU 4.8+",
+ 0 TSRMLS_CC);
+ return NULL;
+ }
+ return umsg_get_numeric_types(mfo, err TSRMLS_CC);
+#endif
+}
+
+static void umsg_set_timezone(MessageFormatter_object *mfo,
+ intl_error& err TSRMLS_DC)
+{
+ MessageFormat *mf = (MessageFormat *)mfo->mf_data.umsgf;
+ TimeZone *used_tz = NULL;
+ const Format **formats;
+ int32_t count;
+
+ /* Unfortanely, this cannot change the time zone for arguments that
+ * appear inside complex formats because ::getFormats() returns NULL
+ * for all uncached formats, which is the case for complex formats
+ * unless they were set via one of the ::setFormat() methods */
+
+ if (mfo->mf_data.tz_set) {
+ return; /* already done */
+ }
+
+ formats = mf->getFormats(count);
+
+ if (formats == NULL) {
+ intl_errors_set(&err, U_MEMORY_ALLOCATION_ERROR,
+ "Out of memory retrieving subformats", 0 TSRMLS_CC);
+ }
+
+ for (int i = 0; U_SUCCESS(err.code) && i < count; i++) {
+ DateFormat* df = dynamic_cast<DateFormat*>(
+ const_cast<Format *>(formats[i]));
+ if (df == NULL) {
+ continue;
+ }
+
+ if (used_tz == NULL) {
+ zval nullzv = zval_used_for_init,
+ *zvptr = &nullzv;
+ used_tz = timezone_process_timezone_argument(&zvptr, &err,
+ "msgfmt_format" TSRMLS_CC);
+ if (used_tz == NULL) {
+ continue;
+ }
+ }
+
+ df->setTimeZone(*used_tz);
+ }
+
+ if (U_SUCCESS(err.code)) {
+ mfo->mf_data.tz_set = 1;
+ }
+}
+
+U_CFUNC void umsg_format_helper(MessageFormatter_object *mfo,
+ HashTable *args,
+ UChar **formatted,
+ int *formatted_len TSRMLS_DC)
+{
+ int arg_count = zend_hash_num_elements(args);
+ std::vector<Formattable> fargs;
+ std::vector<UnicodeString> farg_names;
+ MessageFormat *mf = (MessageFormat *)mfo->mf_data.umsgf;
+ HashTable *types;
+ intl_error& err = INTL_DATA_ERROR(mfo);
+
+ if (U_FAILURE(err.code)) {
+ return;
+ }
+
+ types = umsg_get_types(mfo, err TSRMLS_CC);
+
+ umsg_set_timezone(mfo, err TSRMLS_CC);
+
+ fargs.resize(arg_count);
+ farg_names.resize(arg_count);
+
+ int argNum = 0;
+ HashPosition pos;
+ zval **elem;
+
+ // Key related variables
+ int key_type;
+ char *str_index;
+ uint str_len;
+ ulong num_index;
+
+ for (zend_hash_internal_pointer_reset_ex(args, &pos);
+ U_SUCCESS(err.code) &&
+ (key_type = zend_hash_get_current_key_ex(
+ args, &str_index, &str_len, &num_index, 0, &pos),
+ zend_hash_get_current_data_ex(args, (void **)&elem, &pos)
+ ) == SUCCESS;
+ zend_hash_move_forward_ex(args, &pos), argNum++)
+ {
+ Formattable& formattable = fargs[argNum];
+ UnicodeString& key = farg_names[argNum];
+ Formattable::Type argType = Formattable::kObject, //unknown
+ *storedArgType = NULL;
+
+ /* Process key and retrieve type */
+ if (key_type == HASH_KEY_IS_LONG) {
+ /* includes case where index < 0 because it's exposed as unsigned */
+ if (num_index > (ulong)INT32_MAX) {
+ intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR,
+ "Found negative or too large array key", 0 TSRMLS_CC);
+ continue;
+ }
+
+ UChar temp[16];
+ int32_t len = u_sprintf(temp, "%u", (uint32_t)num_index);
+ key.append(temp, len);
+
+ zend_hash_index_find(types, (ulong)num_index, (void**)&storedArgType);
+ } else { //string; assumed to be in UTF-8
+ intl_stringFromChar(key, str_index, str_len-1, &err.code);
+
+ if (U_FAILURE(err.code)) {
+ char *message;
+ spprintf(&message, 0,
+ "Invalid UTF-8 data in argument key: '%s'", str_index);
+ intl_errors_set(&err, err.code, message, 1 TSRMLS_CC);
+ efree(message);
+ continue;
+ }
+
+ zend_hash_find(types, (char*)key.getBuffer(), key.length(),
+ (void**)&storedArgType);
+ }
+
+ if (storedArgType != NULL) {
+ argType = *storedArgType;
+ }
+
+ /* Convert zval to formattable according to message format type
+ * or (as a fallback) the zval type */
+ if (argType != Formattable::kObject) {
+ switch (argType) {
+ case Formattable::kString:
+ {
+ string_arg:
+ /* This implicitly converts objects
+ * Note that our vectors will leak if object conversion fails
+ * and PHP ends up with a fatal error and calls longjmp
+ * as a result of that.
+ */
+ convert_to_string_ex(elem);
+
+ UnicodeString *text = new UnicodeString();
+ intl_stringFromChar(*text,
+ Z_STRVAL_PP(elem), Z_STRLEN_PP(elem), &err.code);
+
+ if (U_FAILURE(err.code)) {
+ char *message;
+ spprintf(&message, 0, "Invalid UTF-8 data in string argument: "
+ "'%s'", Z_STRVAL_PP(elem));
+ intl_errors_set(&err, err.code, message, 1 TSRMLS_CC);
+ efree(message);
+ delete text;
+ continue;
+ }
+ formattable.adoptString(text);
+ break;
}
- fargs[i].setInt64(tInt64);
+ case Formattable::kDouble:
+ {
+ double d;
+ if (Z_TYPE_PP(elem) == IS_DOUBLE) {
+ d = Z_DVAL_PP(elem);
+ } else if (Z_TYPE_PP(elem) == IS_LONG) {
+ d = (double)Z_LVAL_PP(elem);
+ } else {
+ SEPARATE_ZVAL_IF_NOT_REF(elem);
+ convert_scalar_to_number(*elem TSRMLS_CC);
+ d = (Z_TYPE_PP(elem) == IS_DOUBLE)
+ ? Z_DVAL_PP(elem)
+ : (double)Z_LVAL_PP(elem);
+ }
+ formattable.setDouble(d);
+ break;
+ }
+ case Formattable::kLong:
+ {
+ int32_t tInt32;
+retry_klong:
+ if (Z_TYPE_PP(elem) == IS_DOUBLE) {
+ if (Z_DVAL_PP(elem) > (double)INT32_MAX ||
+ Z_DVAL_PP(elem) < (double)INT32_MIN) {
+ intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR,
+ "Found PHP float with absolute value too large for "
+ "32 bit integer argument", 0 TSRMLS_CC);
+ } else {
+ tInt32 = (int32_t)Z_DVAL_PP(elem);
+ }
+ } else if (Z_TYPE_PP(elem) == IS_LONG) {
+ if (Z_LVAL_PP(elem) > INT32_MAX ||
+ Z_LVAL_PP(elem) < INT32_MIN) {
+ intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR,
+ "Found PHP integer with absolute value too large "
+ "for 32 bit integer argument", 0 TSRMLS_CC);
+ } else {
+ tInt32 = (int32_t)Z_LVAL_PP(elem);
+ }
+ } else {
+ SEPARATE_ZVAL_IF_NOT_REF(elem);
+ convert_scalar_to_number(*elem TSRMLS_CC);
+ goto retry_klong;
+ }
+ formattable.setLong(tInt32);
+ break;
+ }
+ case Formattable::kInt64:
+ {
+ int64_t tInt64;
+retry_kint64:
+ if (Z_TYPE_PP(elem) == IS_DOUBLE) {
+ if (Z_DVAL_PP(elem) > (double)U_INT64_MAX ||
+ Z_DVAL_PP(elem) < (double)U_INT64_MIN) {
+ intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR,
+ "Found PHP float with absolute value too large for "
+ "64 bit integer argument", 0 TSRMLS_CC);
+ } else {
+ tInt64 = (int64_t)Z_DVAL_PP(elem);
+ }
+ } else if (Z_TYPE_PP(elem) == IS_LONG) {
+ /* assume long is not wider than 64 bits */
+ tInt64 = (int64_t)Z_LVAL_PP(elem);
+ } else {
+ SEPARATE_ZVAL_IF_NOT_REF(elem);
+ convert_scalar_to_number(*elem TSRMLS_CC);
+ goto retry_kint64;
+ }
+ formattable.setInt64(tInt64);
+ break;
+ }
+ case Formattable::kDate:
+ {
+ double dd = intl_zval_to_millis(*elem, &err, "msgfmt_format" TSRMLS_CC);
+ if (U_FAILURE(err.code)) {
+ char *message, *key_char;
+ int key_len;
+ UErrorCode status = UErrorCode();
+ if (intl_charFromString(key, &key_char, &key_len,
+ &status) == SUCCESS) {
+ spprintf(&message, 0, "The argument for key '%s' "
+ "cannot be used as a date or time", key_char);
+ intl_errors_set(&err, err.code, message, 1 TSRMLS_CC);
+ efree(key_char);
+ efree(message);
+ }
+ continue;
+ }
+ formattable.setDate(dd);
+ break;
+ }
+ default:
+ intl_errors_set(&err, U_ILLEGAL_ARGUMENT_ERROR,
+ "Found unsupported argument type", 0 TSRMLS_CC);
+ break;
+ }
+ } else {
+ /* We couldn't find any information about the argument in the pattern, this
+ * means it's an extra argument. So convert it to a number if it's a number or
+ * bool or null and to a string if it's anything else except arrays . */
+ switch (Z_TYPE_PP(elem)) {
+ case IS_DOUBLE:
+ formattable.setDouble(Z_DVAL_PP(elem));
break;
-
- case Formattable::kString:
- convert_to_string_ex(&args[i]);
- intl_convert_utf8_to_utf16(&stringVal, &stringLen, Z_STRVAL_P(args[i]), Z_STRLEN_P(args[i]), status);
- if(U_FAILURE(*status)){
- delete[] fargs;
- return;
+ case IS_BOOL:
+ convert_to_long_ex(elem);
+ /* Intentional fallthrough */
+ case IS_LONG:
+ formattable.setInt64((int64_t)Z_LVAL_PP(elem));
+ break;
+ case IS_NULL:
+ formattable.setInt64((int64_t)0);
+ break;
+ case IS_STRING:
+ case IS_OBJECT:
+ goto string_arg;
+ default:
+ {
+ char *message, *key_char;
+ int key_len;
+ UErrorCode status = UErrorCode();
+ if (intl_charFromString(key, &key_char, &key_len,
+ &status) == SUCCESS) {
+ spprintf(&message, 0, "No strategy to convert the "
+ "value given for the argument with key '%s' "
+ "is available", key_char);
+ intl_errors_set(&err,
+ U_ILLEGAL_ARGUMENT_ERROR, message, 1 TSRMLS_CC);
+ efree(key_char);
+ efree(message);
+ }
}
- fargs[i].setString(stringVal);
- efree(stringVal);
- break;
-
- case Formattable::kArray:
- case Formattable::kObject:
- *status = U_UNSUPPORTED_ERROR;
- delete[] fargs;
- return;
- }
+ }
+ }
+ } // visiting each argument
+
+ if (U_FAILURE(err.code)) {
+ return;
}
- UnicodeString resultStr;
- FieldPosition fieldPosition(0);
-
- /* format the message */
- ((const MessageFormat*)fmt)->format(fargs, fmt_count, resultStr, fieldPosition, *status);
+ UnicodeString resultStr;
+ FieldPosition fieldPosition(0);
- delete[] fargs;
+ /* format the message */
+ mf->format(farg_names.empty() ? NULL : &farg_names[0],
+ fargs.empty() ? NULL : &fargs[0], arg_count, resultStr, err.code);
- if(U_FAILURE(*status)){
- return;
- }
+ if (U_FAILURE(err.code)) {
+ intl_errors_set(&err, err.code,
+ "Call to ICU MessageFormat::format() has failed", 0 TSRMLS_CC);
+ return;
+ }
*formatted_len = resultStr.length();
*formatted = eumalloc(*formatted_len+1);
- resultStr.extract(*formatted, *formatted_len+1, *status);
+ resultStr.extract(*formatted, *formatted_len+1, err.code);
+ if (U_FAILURE(err.code)) {
+ intl_errors_set(&err, err.code,
+ "Error copying format() result", 0 TSRMLS_CC);
+ return;
+ }
}
#define cleanup_zvals() for(int j=i;j>=0;j--) { zval_ptr_dtor((*args)+i); }
@@ -154,15 +653,11 @@ U_CFUNC void umsg_parse_helper(UMessageFormat *fmt, int *count, zval ***args, UC
int stmp_len;
ALLOC_INIT_ZVAL((*args)[i]);
-
+
switch(fargs[i].getType()) {
case Formattable::kDate:
aDate = ((double)fargs[i].getDate())/U_MILLIS_PER_SECOND;
- if(aDate > LONG_MAX || aDate < -LONG_MAX) {
- ZVAL_DOUBLE((*args)[i], aDate<0?ceil(aDate):floor(aDate));
- } else {
- ZVAL_LONG((*args)[i], (long)aDate);
- }
+ ZVAL_DOUBLE((*args)[i], aDate);
break;
case Formattable::kDouble:
diff --git a/ext/intl/msgformat/msgformat_helpers.h b/ext/intl/msgformat/msgformat_helpers.h
index 30c7e3930f..e6eda087d2 100644
--- a/ext/intl/msgformat/msgformat_helpers.h
+++ b/ext/intl/msgformat/msgformat_helpers.h
@@ -17,9 +17,9 @@
#ifndef MSG_FORMAT_HELPERS_H
#define MSG_FORMAT_HELPERS_H
-int32_t umsg_format_arg_count(UMessageFormat *fmt);
-void umsg_format_helper(UMessageFormat *fmt, int arg_count, zval **args,
- UChar **formatted, int *formatted_len, UErrorCode *status TSRMLS_DC);
+int32_t umsg_format_arg_count(UMessageFormat *fmt);
+void umsg_format_helper(MessageFormatter_object *mfo, HashTable *args,
+ UChar **formatted, int *formatted_len TSRMLS_DC);
void umsg_parse_helper(UMessageFormat *fmt, int *count, zval ***args,
UChar *source, int source_len, UErrorCode *status);
#endif // MSG_FORMAT_HELPERS_H
diff --git a/ext/intl/msgformat/msgformat_parse.c b/ext/intl/msgformat/msgformat_parse.c
index f540b1d0c4..413d3b1f15 100644
--- a/ext/intl/msgformat/msgformat_parse.c
+++ b/ext/intl/msgformat/msgformat_parse.c
@@ -126,7 +126,7 @@ PHP_FUNCTION( msgfmt_parse_message )
}
if(slocale_len == 0) {
- slocale = INTL_G(default_locale);
+ slocale = intl_locale_get_default(TSRMLS_C);
}
#ifdef MSG_FORMAT_QUOTE_APOS