diff options
author | Gustavo André dos Santos Lopes <cataphract@php.net> | 2012-06-25 12:11:49 +0200 |
---|---|---|
committer | Gustavo André dos Santos Lopes <cataphract@php.net> | 2012-06-25 12:11:49 +0200 |
commit | 75964450ed12778ca06e2789124d87fc8b6f3945 (patch) | |
tree | c649a62516de424f187eaa2317ebd132851caac0 | |
parent | 715e59ad82862785261dcf91570583eda9fef081 (diff) | |
parent | 0df73a85e19d71612ab3a0ba03061123453ff2e3 (diff) | |
download | php-git-75964450ed12778ca06e2789124d87fc8b6f3945.tar.gz |
Merge branch 'break_iterator'
* break_iterator:
Fix typo in error message
BreakIterator: fix compat with old ICU versions
Fix build error one ext/intl
BreakIterator::getPartsIterator: new optional arg
Added IntlCodePointBreakIterator.
Add Intl prefix to BreakIterator/RuleBasedBI
Remove trailing space
Replaced zend_parse_method_params with plain zpp
BreakIter: Removed getAvailableLocales/getHashCode
Change in BreakIterator::getPartsIterator()
BreakIterator: add rules status constants
Tests for (RuleBased)BreakIterator.
BreakIterator and RuleBasedBreakiterator added
57 files changed, 3375 insertions, 122 deletions
diff --git a/ext/intl/breakiterator/breakiterator_class.cpp b/ext/intl/breakiterator/breakiterator_class.cpp new file mode 100644 index 0000000000..de4bfbb7b0 --- /dev/null +++ b/ext/intl/breakiterator/breakiterator_class.cpp @@ -0,0 +1,397 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <unicode/brkiter.h> +#include <unicode/rbbi.h> +#include "codepointiterator_internal.h" + +#include "breakiterator_iterators.h" + +#include <typeinfo> + +extern "C" { +#define USE_BREAKITERATOR_POINTER 1 +#include "breakiterator_class.h" +#include "breakiterator_methods.h" +#include "rulebasedbreakiterator_methods.h" +#include "codepointiterator_methods.h" +#include <zend_exceptions.h> +#include <zend_interfaces.h> +#include <assert.h> +} + +using PHP::CodePointBreakIterator; + +/* {{{ Global variables */ +zend_class_entry *BreakIterator_ce_ptr; +zend_class_entry *RuleBasedBreakIterator_ce_ptr; +zend_class_entry *CodePointBreakIterator_ce_ptr; +zend_object_handlers BreakIterator_handlers; +/* }}} */ + +U_CFUNC void breakiterator_object_create(zval *object, + BreakIterator *biter TSRMLS_DC) +{ + UClassID classId = biter->getDynamicClassID(); + zend_class_entry *ce; + + if (classId == RuleBasedBreakIterator::getStaticClassID()) { + ce = RuleBasedBreakIterator_ce_ptr; + } else if (classId == CodePointBreakIterator::getStaticClassID()) { + ce = CodePointBreakIterator_ce_ptr; + } else { + ce = BreakIterator_ce_ptr; + } + + object_init_ex(object, ce); + breakiterator_object_construct(object, biter TSRMLS_CC); +} + +U_CFUNC void breakiterator_object_construct(zval *object, + BreakIterator *biter TSRMLS_DC) +{ + BreakIterator_object *bio; + + BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK; //populate to from object + assert(bio->biter == NULL); + bio->biter = biter; +} + +/* {{{ compare handler for BreakIterator */ +static int BreakIterator_compare_objects(zval *object1, + zval *object2 TSRMLS_DC) +{ + BreakIterator_object *bio1, + *bio2; + + bio1 = (BreakIterator_object*)zend_object_store_get_object(object1 TSRMLS_CC); + bio2 = (BreakIterator_object*)zend_object_store_get_object(object2 TSRMLS_CC); + + if (bio1->biter == NULL || bio2->biter == NULL) { + return bio1->biter == bio2->biter ? 0 : 1; + } + + return *bio1->biter == *bio2->biter ? 0 : 1; +} +/* }}} */ + +/* {{{ clone handler for BreakIterator */ +static zend_object_value BreakIterator_clone_obj(zval *object TSRMLS_DC) +{ + BreakIterator_object *bio_orig, + *bio_new; + zend_object_value ret_val; + + bio_orig = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC); + intl_errors_reset(INTL_DATA_ERROR_P(bio_orig) TSRMLS_CC); + + ret_val = BreakIterator_ce_ptr->create_object(Z_OBJCE_P(object) TSRMLS_CC); + bio_new = (BreakIterator_object*)zend_object_store_get_object_by_handle( + ret_val.handle TSRMLS_CC); + + zend_objects_clone_members(&bio_new->zo, ret_val, + &bio_orig->zo, Z_OBJ_HANDLE_P(object) TSRMLS_CC); + + if (bio_orig->biter != NULL) { + BreakIterator *new_biter; + + new_biter = bio_orig->biter->clone(); + if (!new_biter) { + char *err_msg; + intl_errors_set_code(BREAKITER_ERROR_P(bio_orig), + U_MEMORY_ALLOCATION_ERROR TSRMLS_CC); + intl_errors_set_custom_msg(BREAKITER_ERROR_P(bio_orig), + "Could not clone BreakIterator", 0 TSRMLS_CC); + err_msg = intl_error_get_message(BREAKITER_ERROR_P(bio_orig) TSRMLS_CC); + zend_throw_exception(NULL, err_msg, 0 TSRMLS_CC); + efree(err_msg); + } else { + bio_new->biter = new_biter; + bio_new->text = bio_orig->text; + if (bio_new->text) { + zval_add_ref(&bio_new->text); + } + } + } else { + zend_throw_exception(NULL, "Cannot clone unconstructed BreakIterator", 0 TSRMLS_CC); + } + + return ret_val; +} +/* }}} */ + +/* {{{ get_debug_info handler for BreakIterator */ +static HashTable *BreakIterator_get_debug_info(zval *object, int *is_temp TSRMLS_DC) +{ + zval zv = zval_used_for_init; + BreakIterator_object *bio; + const BreakIterator *biter; + + *is_temp = 1; + + array_init_size(&zv, 8); + + bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC); + biter = bio->biter; + + if (biter == NULL) { + add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 0); + return Z_ARRVAL(zv); + } + add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 1); + + if (bio->text == NULL) { + add_assoc_null_ex(&zv, "text", sizeof("text")); + } else { + zval_add_ref(&bio->text); + add_assoc_zval_ex(&zv, "text", sizeof("text"), bio->text); + } + + add_assoc_string_ex(&zv, "type", sizeof("type"), + const_cast<char*>(typeid(*biter).name()), 1); + + return Z_ARRVAL(zv); +} +/* }}} */ + +/* {{{ void breakiterator_object_init(BreakIterator_object* to) + * Initialize internals of BreakIterator_object not specific to zend standard objects. + */ +static void breakiterator_object_init(BreakIterator_object *bio TSRMLS_DC) +{ + intl_error_init(BREAKITER_ERROR_P(bio) TSRMLS_CC); + bio->biter = NULL; + bio->text = NULL; +} +/* }}} */ + +/* {{{ BreakIterator_objects_dtor */ +static void BreakIterator_objects_dtor(void *object, + zend_object_handle handle TSRMLS_DC) +{ + zend_objects_destroy_object((zend_object*)object, handle TSRMLS_CC); +} +/* }}} */ + +/* {{{ BreakIterator_objects_free */ +static void BreakIterator_objects_free(zend_object *object TSRMLS_DC) +{ + BreakIterator_object* bio = (BreakIterator_object*) object; + + if (bio->text) { + zval_ptr_dtor(&bio->text); + } + if (bio->biter) { + delete bio->biter; + bio->biter = NULL; + } + intl_error_reset(BREAKITER_ERROR_P(bio) TSRMLS_CC); + + zend_object_std_dtor(&bio->zo TSRMLS_CC); + + efree(bio); +} +/* }}} */ + +/* {{{ BreakIterator_object_create */ +static zend_object_value BreakIterator_object_create(zend_class_entry *ce TSRMLS_DC) +{ + zend_object_value retval; + BreakIterator_object* intern; + + intern = (BreakIterator_object*)ecalloc(1, sizeof(BreakIterator_object)); + + zend_object_std_init(&intern->zo, ce TSRMLS_CC); +#if PHP_VERSION_ID < 50399 + zend_hash_copy(intern->zo.properties, &(ce->default_properties), + (copy_ctor_func_t) zval_add_ref, NULL, sizeof(zval*)); +#else + object_properties_init((zend_object*) intern, ce); +#endif + breakiterator_object_init(intern TSRMLS_CC); + + retval.handle = zend_objects_store_put( + intern, + BreakIterator_objects_dtor, + (zend_objects_free_object_storage_t) BreakIterator_objects_free, + NULL TSRMLS_CC); + + retval.handlers = &BreakIterator_handlers; + + return retval; +} +/* }}} */ + +/* {{{ BreakIterator/RuleBasedBreakIterator methods arguments info */ + +ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_void, 0, 0, 0) +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_locale, 0, 0, 0) + ZEND_ARG_INFO(0, "locale") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_setText, 0, 0, 1) + ZEND_ARG_INFO(0, "text") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_next, 0, 0, 0) + ZEND_ARG_INFO(0, "offset") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_offset, 0, 0, 1) + ZEND_ARG_INFO(0, "offset") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_get_locale, 0, 0, 1) + ZEND_ARG_INFO(0, "locale_type") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_getPartsIterator, 0, 0, 0) + ZEND_ARG_INFO(0, "key_type") +ZEND_END_ARG_INFO() + +ZEND_BEGIN_ARG_INFO_EX(ainfo_rbbi___construct, 0, 0, 1) + ZEND_ARG_INFO(0, "rules") + ZEND_ARG_INFO(0, "areCompiled") +ZEND_END_ARG_INFO() + +/* }}} */ + +/* {{{ BreakIterator_class_functions + * Every 'BreakIterator' class method has an entry in this table + */ +static const zend_function_entry BreakIterator_class_functions[] = { + PHP_ME(BreakIterator, __construct, ainfo_biter_void, ZEND_ACC_PRIVATE) + PHP_ME_MAPPING(createWordInstance, breakiter_create_word_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(createLineInstance, breakiter_create_line_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(createCharacterInstance, breakiter_create_character_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(createSentenceInstance, breakiter_create_sentence_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(createTitleInstance, breakiter_create_title_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(createCodePointInstance, breakiter_create_code_point_instance, ainfo_biter_void, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(getText, breakiter_get_text, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(setText, breakiter_set_text, ainfo_biter_setText, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(first, breakiter_first, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(last, breakiter_last, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(previous, breakiter_previous, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(next, breakiter_next, ainfo_biter_next, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(current, breakiter_current, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(following, breakiter_following, ainfo_biter_offset, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(preceding, breakiter_preceding, ainfo_biter_offset, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(isBoundary, breakiter_is_boundary, ainfo_biter_offset, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(getLocale, breakiter_get_locale, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(getPartsIterator, breakiter_get_parts_iterator, ainfo_biter_getPartsIterator, ZEND_ACC_PUBLIC) + + PHP_ME_MAPPING(getErrorCode, breakiter_get_error_code, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(getErrorMessage, breakiter_get_error_message, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_FE_END +}; +/* }}} */ + +/* {{{ RuleBasedBreakIterator_class_functions + */ +static const zend_function_entry RuleBasedBreakIterator_class_functions[] = { + PHP_ME(IntlRuleBasedBreakIterator, __construct, ainfo_rbbi___construct, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(getRules, rbbi_get_rules, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(getRuleStatus, rbbi_get_rule_status, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_ME_MAPPING(getRuleStatusVec, rbbi_get_rule_status_vec, ainfo_biter_void, ZEND_ACC_PUBLIC) +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48 + PHP_ME_MAPPING(getBinaryRules, rbbi_get_binary_rules, ainfo_biter_void, ZEND_ACC_PUBLIC) +#endif + PHP_FE_END +}; +/* }}} */ + +/* {{{ CodePointBreakIterator_class_functions + */ +static const zend_function_entry CodePointBreakIterator_class_functions[] = { + PHP_ME_MAPPING(getLastCodePoint, cpbi_get_last_code_point, ainfo_biter_void, ZEND_ACC_PUBLIC) + PHP_FE_END +}; +/* }}} */ + + +/* {{{ breakiterator_register_BreakIterator_class + * Initialize 'BreakIterator' class + */ +U_CFUNC void breakiterator_register_BreakIterator_class(TSRMLS_D) +{ + zend_class_entry ce; + + /* Create and register 'BreakIterator' class. */ + INIT_CLASS_ENTRY(ce, "IntlBreakIterator", BreakIterator_class_functions); + ce.create_object = BreakIterator_object_create; + ce.get_iterator = _breakiterator_get_iterator; + BreakIterator_ce_ptr = zend_register_internal_class(&ce TSRMLS_CC); + + memcpy(&BreakIterator_handlers, zend_get_std_object_handlers(), + sizeof BreakIterator_handlers); + BreakIterator_handlers.compare_objects = BreakIterator_compare_objects; + BreakIterator_handlers.clone_obj = BreakIterator_clone_obj; + BreakIterator_handlers.get_debug_info = BreakIterator_get_debug_info; + + zend_class_implements(BreakIterator_ce_ptr TSRMLS_CC, 1, + zend_ce_traversable); + + zend_declare_class_constant_long(BreakIterator_ce_ptr, + "DONE", sizeof("DONE") - 1, BreakIterator::DONE TSRMLS_CC ); + + /* Declare constants that are defined in the C header */ +#define BREAKITER_DECL_LONG_CONST(name) \ + zend_declare_class_constant_long(BreakIterator_ce_ptr, #name, \ + sizeof(#name) - 1, UBRK_ ## name TSRMLS_CC) + + BREAKITER_DECL_LONG_CONST(WORD_NONE); + BREAKITER_DECL_LONG_CONST(WORD_NONE_LIMIT); + BREAKITER_DECL_LONG_CONST(WORD_NUMBER); + BREAKITER_DECL_LONG_CONST(WORD_NUMBER_LIMIT); + BREAKITER_DECL_LONG_CONST(WORD_LETTER); + BREAKITER_DECL_LONG_CONST(WORD_LETTER_LIMIT); + BREAKITER_DECL_LONG_CONST(WORD_KANA); + BREAKITER_DECL_LONG_CONST(WORD_KANA_LIMIT); + BREAKITER_DECL_LONG_CONST(WORD_IDEO); + BREAKITER_DECL_LONG_CONST(WORD_IDEO_LIMIT); + + BREAKITER_DECL_LONG_CONST(LINE_SOFT); + BREAKITER_DECL_LONG_CONST(LINE_SOFT_LIMIT); + BREAKITER_DECL_LONG_CONST(LINE_HARD); + BREAKITER_DECL_LONG_CONST(LINE_HARD_LIMIT); + + BREAKITER_DECL_LONG_CONST(SENTENCE_TERM); + BREAKITER_DECL_LONG_CONST(SENTENCE_TERM_LIMIT); + BREAKITER_DECL_LONG_CONST(SENTENCE_SEP); + BREAKITER_DECL_LONG_CONST(SENTENCE_SEP_LIMIT); + +#undef BREAKITER_DECL_LONG_CONST + + + /* Create and register 'RuleBasedBreakIterator' class. */ + INIT_CLASS_ENTRY(ce, "IntlRuleBasedBreakIterator", + RuleBasedBreakIterator_class_functions); + RuleBasedBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce, + BreakIterator_ce_ptr, NULL TSRMLS_CC); + + /* Create and register 'CodePointBreakIterator' class. */ + INIT_CLASS_ENTRY(ce, "IntlCodePointBreakIterator", + CodePointBreakIterator_class_functions); + CodePointBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce, + BreakIterator_ce_ptr, NULL TSRMLS_CC); +} +/* }}} */ diff --git a/ext/intl/breakiterator/breakiterator_class.h b/ext/intl/breakiterator/breakiterator_class.h new file mode 100644 index 0000000000..cc5d51256f --- /dev/null +++ b/ext/intl/breakiterator/breakiterator_class.h @@ -0,0 +1,71 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#ifndef BREAKITERATOR_CLASS_H +#define BREAKITERATOR_CLASS_H + +//redefinition of inline in PHP headers causes problems, so include this before +#include <math.h> + +#include <php.h> +#include "../intl_error.h" +#include "../intl_data.h" + +#ifndef USE_BREAKITERATOR_POINTER +typedef void BreakIterator; +#endif + +typedef struct { + zend_object zo; + + // error handling + intl_error err; + + // ICU break iterator + BreakIterator* biter; + + // current text + zval *text; +} BreakIterator_object; + +#define BREAKITER_ERROR(bio) (bio)->err +#define BREAKITER_ERROR_P(bio) &(BREAKITER_ERROR(bio)) + +#define BREAKITER_ERROR_CODE(bio) INTL_ERROR_CODE(BREAKITER_ERROR(bio)) +#define BREAKITER_ERROR_CODE_P(bio) &(INTL_ERROR_CODE(BREAKITER_ERROR(bio))) + +#define BREAKITER_METHOD_INIT_VARS INTL_METHOD_INIT_VARS(BreakIterator, bio) +#define BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK INTL_METHOD_FETCH_OBJECT(BreakIterator, bio) +#define BREAKITER_METHOD_FETCH_OBJECT \ + BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK; \ + if (bio->biter == NULL) \ + { \ + intl_errors_set(&bio->err, U_ILLEGAL_ARGUMENT_ERROR, "Found unconstructed BreakIterator", 0 TSRMLS_CC); \ + RETURN_FALSE; \ + } + +void breakiterator_object_create(zval *object, BreakIterator *break_iter TSRMLS_DC); + +void breakiterator_object_construct(zval *object, BreakIterator *break_iter TSRMLS_DC); + +void breakiterator_register_BreakIterator_class(TSRMLS_D); + +extern zend_class_entry *BreakIterator_ce_ptr, + *RuleBasedBreakIterator_ce_ptr; + +extern zend_object_handlers BreakIterator_handlers; + +#endif /* #ifndef BREAKITERATOR_CLASS_H */ diff --git a/ext/intl/breakiterator/breakiterator_iterators.cpp b/ext/intl/breakiterator/breakiterator_iterators.cpp new file mode 100644 index 0000000000..d88ad8a712 --- /dev/null +++ b/ext/intl/breakiterator/breakiterator_iterators.cpp @@ -0,0 +1,346 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <unicode/brkiter.h> + +#include "breakiterator_iterators.h" +#include "../common/common_enum.h" + +extern "C" { +#define USE_BREAKITERATOR_POINTER +#include "breakiterator_class.h" +#include "../intl_convert.h" +#include "../locale/locale.h" +#include <zend_exceptions.h> +} + +static zend_class_entry *IntlPartsIterator_ce_ptr; +static zend_object_handlers IntlPartsIterator_handlers; + +/* BreakIterator's iterator */ + +inline BreakIterator *_breakiter_prolog(zend_object_iterator *iter TSRMLS_DC) +{ + BreakIterator_object *bio; + bio = (BreakIterator_object*)zend_object_store_get_object( + (const zval*)iter->data TSRMLS_CC); + intl_errors_reset(BREAKITER_ERROR_P(bio) TSRMLS_CC); + if (bio->biter == NULL) { + intl_errors_set(BREAKITER_ERROR_P(bio), U_INVALID_STATE_ERROR, + "The BreakIterator object backing the PHP iterator is not " + "properly constructed", 0 TSRMLS_CC); + } + return bio->biter; +} + +static void _breakiterator_destroy_it(zend_object_iterator *iter TSRMLS_DC) +{ + zval_ptr_dtor((zval**)&iter->data); +} + +static void _breakiterator_move_forward(zend_object_iterator *iter TSRMLS_DC) +{ + BreakIterator *biter = _breakiter_prolog(iter TSRMLS_CC); + zoi_with_current *zoi_iter = (zoi_with_current*)iter; + + iter->funcs->invalidate_current(iter TSRMLS_CC); + + if (biter == NULL) { + return; + } + + int32_t pos = biter->next(); + if (pos != BreakIterator::DONE) { + MAKE_STD_ZVAL(zoi_iter->current); + ZVAL_LONG(zoi_iter->current, (long)pos); + } //else we've reached the end of the enum, nothing more is required +} + +static void _breakiterator_rewind(zend_object_iterator *iter TSRMLS_DC) +{ + BreakIterator *biter = _breakiter_prolog(iter TSRMLS_CC); + zoi_with_current *zoi_iter = (zoi_with_current*)iter; + + int32_t pos = biter->first(); + MAKE_STD_ZVAL(zoi_iter->current); + ZVAL_LONG(zoi_iter->current, (long)pos); +} + +static zend_object_iterator_funcs breakiterator_iterator_funcs = { + zoi_with_current_dtor, + zoi_with_current_valid, + zoi_with_current_get_current_data, + NULL, + _breakiterator_move_forward, + _breakiterator_rewind, + zoi_with_current_invalidate_current +}; + +U_CFUNC zend_object_iterator *_breakiterator_get_iterator( + zend_class_entry *ce, zval *object, int by_ref TSRMLS_DC) +{ + BreakIterator_object *bio; + if (by_ref) { + zend_throw_exception(NULL, + "Iteration by reference is not supported", 0 TSRMLS_CC); + return NULL; + } + + bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC); + BreakIterator *biter = bio->biter; + + if (biter == NULL) { + zend_throw_exception(NULL, + "The BreakIterator is not properly constructed", 0 TSRMLS_CC); + return NULL; + } + + zoi_with_current *zoi_iter = + static_cast<zoi_with_current*>(emalloc(sizeof *zoi_iter)); + zoi_iter->zoi.data = static_cast<void*>(object); + zoi_iter->zoi.funcs = &breakiterator_iterator_funcs; + zoi_iter->zoi.index = 0; + zoi_iter->destroy_it = _breakiterator_destroy_it; + zoi_iter->wrapping_obj = NULL; /* not used; object is in zoi.data */ + zoi_iter->current = NULL; + + zval_add_ref(&object); + + return reinterpret_cast<zend_object_iterator *>(zoi_iter); +} + +/* BreakIterator parts iterator */ + +typedef struct zoi_break_iter_parts { + zoi_with_current zoi_cur; + parts_iter_key_type key_type; + BreakIterator_object *bio; /* so we don't have to fetch it all the time */ +} zoi_break_iter_parts; + +static void _breakiterator_parts_destroy_it(zend_object_iterator *iter TSRMLS_DC) +{ + zval_ptr_dtor(reinterpret_cast<zval**>(&iter->data)); +} + +static int _breakiterator_parts_get_current_key(zend_object_iterator *iter, + char **str_key, + uint *str_key_len, + ulong *int_key TSRMLS_DC) +{ + /* the actual work is done in move_forward and rewind */ + *int_key = iter->index; + return HASH_KEY_IS_LONG; +} + +static void _breakiterator_parts_move_forward(zend_object_iterator *iter TSRMLS_DC) +{ + zoi_break_iter_parts *zoi_bit = (zoi_break_iter_parts*)iter; + BreakIterator_object *bio = zoi_bit->bio; + + iter->funcs->invalidate_current(iter TSRMLS_CC); + + int32_t cur, + next; + + cur = bio->biter->current(); + if (cur == BreakIterator::DONE) { + return; + } + next = bio->biter->next(); + if (next == BreakIterator::DONE) { + return; + } + + if (zoi_bit->key_type == PARTS_ITERATOR_KEY_LEFT) { + iter->index = cur; + } else if (zoi_bit->key_type == PARTS_ITERATOR_KEY_RIGHT) { + iter->index = next; + } + /* else zoi_bit->key_type == PARTS_ITERATOR_KEY_SEQUENTIAL + * No need to do anything, the engine increments ->index */ + + const char *s = Z_STRVAL_P(bio->text); + int32_t slen = Z_STRLEN_P(bio->text), + len; + char *res; + + if (next == BreakIterator::DONE) { + next = slen; + } + assert(next <= slen && next >= cur); + len = next - cur; + res = static_cast<char*>(emalloc(len + 1)); + + memcpy(res, &s[cur], len); + res[len] = '\0'; + + MAKE_STD_ZVAL(zoi_bit->zoi_cur.current); + ZVAL_STRINGL(zoi_bit->zoi_cur.current, res, len, 0); +} + +static void _breakiterator_parts_rewind(zend_object_iterator *iter TSRMLS_DC) +{ + zoi_break_iter_parts *zoi_bit = (zoi_break_iter_parts*)iter; + BreakIterator_object *bio = zoi_bit->bio; + + if (zoi_bit->zoi_cur.current) { + iter->funcs->invalidate_current(iter TSRMLS_CC); + } + + bio->biter->first(); + + iter->funcs->move_forward(iter TSRMLS_CC); +} + +static zend_object_iterator_funcs breakiterator_parts_it_funcs = { + zoi_with_current_dtor, + zoi_with_current_valid, + zoi_with_current_get_current_data, + _breakiterator_parts_get_current_key, + _breakiterator_parts_move_forward, + _breakiterator_parts_rewind, + zoi_with_current_invalidate_current +}; + +void IntlIterator_from_BreakIterator_parts(zval *break_iter_zv, + zval *object, + parts_iter_key_type key_type TSRMLS_DC) +{ + IntlIterator_object *ii; + + zval_add_ref(&break_iter_zv); + + object_init_ex(object, IntlPartsIterator_ce_ptr); + ii = (IntlIterator_object*)zend_object_store_get_object(object TSRMLS_CC); + + ii->iterator = (zend_object_iterator*)emalloc(sizeof(zoi_break_iter_parts)); + ii->iterator->data = break_iter_zv; + ii->iterator->funcs = &breakiterator_parts_it_funcs; + ii->iterator->index = 0; + ((zoi_with_current*)ii->iterator)->destroy_it = _breakiterator_parts_destroy_it; + ((zoi_with_current*)ii->iterator)->wrapping_obj = object; + ((zoi_with_current*)ii->iterator)->current = NULL; + + ((zoi_break_iter_parts*)ii->iterator)->bio = (BreakIterator_object*) + zend_object_store_get_object(break_iter_zv TSRMLS_CC); + assert(((zoi_break_iter_parts*)ii->iterator)->bio->biter != NULL); + ((zoi_break_iter_parts*)ii->iterator)->key_type = key_type; +} + +U_CFUNC zend_object_value IntlPartsIterator_object_create(zend_class_entry *ce TSRMLS_DC) +{ + zend_object_value retval; + + retval = IntlIterator_ce_ptr->create_object(ce TSRMLS_CC); + retval.handlers = &IntlPartsIterator_handlers; + + return retval; +} + +U_CFUNC zend_function *IntlPartsIterator_get_method(zval **object_ptr, + char *method, int method_len, const zend_literal *key TSRMLS_DC) +{ + zend_literal local_literal = {0}; + zend_function *ret; + ALLOCA_FLAG(use_heap) + + if (key == NULL) { + Z_STRVAL(local_literal.constant) = static_cast<char*>( + do_alloca(method_len + 1, use_heap)); + zend_str_tolower_copy(Z_STRVAL(local_literal.constant), + method, method_len); + local_literal.hash_value = zend_hash_func( + Z_STRVAL(local_literal.constant), method_len + 1); + key = &local_literal; + } + + if ((key->hash_value & 0xFFFFFFFF) == 0xA2B486A1 /* hash of getrulestatus\0 */ + && method_len == sizeof("getrulestatus") - 1 + && memcmp("getrulestatus", Z_STRVAL(key->constant), method_len) == 0) { + IntlIterator_object *obj = (IntlIterator_object*) + zend_object_store_get_object(*object_ptr TSRMLS_CC); + if (obj->iterator && obj->iterator->data) { + zval *break_iter_zv = static_cast<zval*>(obj->iterator->data); + *object_ptr = break_iter_zv; + ret = Z_OBJ_HANDLER_P(break_iter_zv, get_method)(object_ptr, + method, method_len, key TSRMLS_CC); + goto end; + } + } + + ret = std_object_handlers.get_method(object_ptr, + method, method_len, key TSRMLS_CC); + +end: + if (key == &local_literal) { + free_alloca(Z_STRVAL(local_literal.constant), use_heap); + } + + return ret; +} + +U_CFUNC PHP_METHOD(IntlPartsIterator, getBreakIterator) +{ + INTLITERATOR_METHOD_INIT_VARS; + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "IntlPartsIterator::getBreakIterator: bad arguments", 0 TSRMLS_CC); + return; + } + + INTLITERATOR_METHOD_FETCH_OBJECT; + + zval *biter_zval = static_cast<zval*>(ii->iterator->data); + RETURN_ZVAL(biter_zval, 1, 0); +} + +ZEND_BEGIN_ARG_INFO_EX(ainfo_parts_it_void, 0, 0, 0) +ZEND_END_ARG_INFO() + +static const zend_function_entry IntlPartsIterator_class_functions[] = { + PHP_ME(IntlPartsIterator, getBreakIterator, ainfo_parts_it_void, ZEND_ACC_PUBLIC) + PHP_FE_END +}; + +U_CFUNC void breakiterator_register_IntlPartsIterator_class(TSRMLS_D) +{ + zend_class_entry ce; + + /* Create and register 'BreakIterator' class. */ + INIT_CLASS_ENTRY(ce, "IntlPartsIterator", IntlPartsIterator_class_functions); + IntlPartsIterator_ce_ptr = zend_register_internal_class_ex(&ce, + IntlIterator_ce_ptr, NULL TSRMLS_CC); + IntlPartsIterator_ce_ptr->create_object = IntlPartsIterator_object_create; + + memcpy(&IntlPartsIterator_handlers, &IntlIterator_handlers, + sizeof IntlPartsIterator_handlers); + IntlPartsIterator_handlers.get_method = IntlPartsIterator_get_method; + +#define PARTSITER_DECL_LONG_CONST(name) \ + zend_declare_class_constant_long(IntlPartsIterator_ce_ptr, #name, \ + sizeof(#name) - 1, PARTS_ITERATOR_ ## name TSRMLS_CC) + + PARTSITER_DECL_LONG_CONST(KEY_SEQUENTIAL); + PARTSITER_DECL_LONG_CONST(KEY_LEFT); + PARTSITER_DECL_LONG_CONST(KEY_RIGHT); + +#undef PARTSITER_DECL_LONG_CONST +}
\ No newline at end of file diff --git a/ext/intl/breakiterator/breakiterator_iterators.h b/ext/intl/breakiterator/breakiterator_iterators.h new file mode 100644 index 0000000000..7162072414 --- /dev/null +++ b/ext/intl/breakiterator/breakiterator_iterators.h @@ -0,0 +1,42 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ +*/ +#ifndef INTL_BREAKITERATOR_ITERATORS_H +#define INTL_BREAKITERATOR_ITERATORS_H + +#include <unicode/umachine.h> + +U_CDECL_BEGIN +#include <math.h> +#include <php.h> +U_CDECL_END + +typedef enum { + PARTS_ITERATOR_KEY_SEQUENTIAL, + PARTS_ITERATOR_KEY_LEFT, + PARTS_ITERATOR_KEY_RIGHT, +} parts_iter_key_type; + +#ifdef __cplusplus +void IntlIterator_from_BreakIterator_parts(zval *break_iter_zv, + zval *object, + parts_iter_key_type key_type TSRMLS_DC); +#endif + +U_CFUNC zend_object_iterator *_breakiterator_get_iterator( + zend_class_entry *ce, zval *object, int by_ref TSRMLS_DC); +U_CFUNC void breakiterator_register_IntlPartsIterator_class(TSRMLS_D); + +#endif
\ No newline at end of file diff --git a/ext/intl/breakiterator/breakiterator_methods.cpp b/ext/intl/breakiterator/breakiterator_methods.cpp new file mode 100644 index 0000000000..7b502528f3 --- /dev/null +++ b/ext/intl/breakiterator/breakiterator_methods.cpp @@ -0,0 +1,451 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <unicode/brkiter.h> +#include "codepointiterator_internal.h" + +#include "breakiterator_iterators.h" + +extern "C" { +#define USE_BREAKITERATOR_POINTER 1 +#include "breakiterator_class.h" +#include "../locale/locale.h" +#include <zend_exceptions.h> +} + +using PHP::CodePointBreakIterator; + +U_CFUNC PHP_METHOD(BreakIterator, __construct) +{ + zend_throw_exception( NULL, + "An object of this type cannot be created with the new operator", + 0 TSRMLS_CC ); +} + +static void _breakiter_factory(const char *func_name, + BreakIterator *(*func)(const Locale&, UErrorCode&), + INTERNAL_FUNCTION_PARAMETERS) +{ + BreakIterator *biter; + const char *locale_str = NULL; + int dummy; + char *msg; + UErrorCode status = UErrorCode(); + intl_error_reset(NULL TSRMLS_CC); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s!", + &locale_str, &dummy) == FAILURE) { + spprintf(&msg, NULL, "%s: bad arguments", func_name); + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC); + efree(msg); + RETURN_NULL(); + } + + if (locale_str == NULL) { + locale_str = intl_locale_get_default(TSRMLS_C); + } + + biter = func(Locale::createFromName(locale_str), status); + intl_error_set_code(NULL, status TSRMLS_CC); + if (U_FAILURE(status)) { + spprintf(&msg, NULL, "%s: error creating BreakIterator", + func_name); + intl_error_set_custom_msg(NULL, msg, 1 TSRMLS_CC); + efree(msg); + RETURN_NULL(); + } + + breakiterator_object_create(return_value, biter TSRMLS_CC); +} + +U_CFUNC PHP_FUNCTION(breakiter_create_word_instance) +{ + _breakiter_factory("breakiter_create_word_instance", + &BreakIterator::createWordInstance, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_create_line_instance) +{ + _breakiter_factory("breakiter_create_line_instance", + &BreakIterator::createLineInstance, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_create_character_instance) +{ + _breakiter_factory("breakiter_create_character_instance", + &BreakIterator::createCharacterInstance, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_create_sentence_instance) +{ + _breakiter_factory("breakiter_create_sentence_instance", + &BreakIterator::createSentenceInstance, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_create_title_instance) +{ + _breakiter_factory("breakiter_create_title_instance", + &BreakIterator::createTitleInstance, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_create_code_point_instance) +{ + UErrorCode status = UErrorCode(); + intl_error_reset(NULL TSRMLS_CC); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_create_code_point_instance: bad arguments", 0 TSRMLS_CC); + RETURN_NULL(); + } + + CodePointBreakIterator *cpbi = new CodePointBreakIterator(); + breakiterator_object_create(return_value, cpbi TSRMLS_CC); +} + +U_CFUNC PHP_FUNCTION(breakiter_get_text) +{ + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_get_text: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + if (bio->text == NULL) { + RETURN_NULL(); + } else { + RETURN_ZVAL(bio->text, 1, 0); + } +} + +U_CFUNC PHP_FUNCTION(breakiter_set_text) +{ + char *text; + int text_len; + UText *ut = NULL; + zval **textzv; + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", + &text, &text_len) == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_set_text: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + int res = zend_get_parameters_ex(1, &textzv); + assert(res == SUCCESS); + + BREAKITER_METHOD_FETCH_OBJECT; + + /* assert it's safe to use text and text_len because zpp changes the + * arguments in the stack */ + assert(text == Z_STRVAL_PP(textzv)); + + ut = utext_openUTF8(ut, text, text_len, BREAKITER_ERROR_CODE_P(bio)); + INTL_CTOR_CHECK_STATUS(bio, "breakiter_set_text: error opening UText"); + + bio->biter->setText(ut, BREAKITER_ERROR_CODE(bio)); + utext_close(ut); /* ICU shallow clones the UText */ + INTL_CTOR_CHECK_STATUS(bio, "breakiter_set_text: error calling " + "BreakIterator::setText()"); + + /* When ICU clones the UText, it does not copy the buffer, so we have to + * keep the string buffer around by holding a reference to its zval. This + * also allows a faste implementation of getText() */ + if (bio->text != NULL) { + zval_ptr_dtor(&bio->text); + } + bio->text = *textzv; + zval_add_ref(&bio->text); + + RETURN_TRUE; +} + +static void _breakiter_no_args_ret_int32( + const char *func_name, + int32_t (BreakIterator::*func)(), + INTERNAL_FUNCTION_PARAMETERS) +{ + char *msg; + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + spprintf(&msg, NULL, "%s: bad arguments", func_name); + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC); + efree(msg); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + int32_t res = (bio->biter->*func)(); + + RETURN_LONG((long)res); +} + +static void _breakiter_int32_ret_int32( + const char *func_name, + int32_t (BreakIterator::*func)(int32_t), + INTERNAL_FUNCTION_PARAMETERS) +{ + char *msg; + long arg; + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &arg) == FAILURE) { + spprintf(&msg, NULL, "%s: bad arguments", func_name); + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC); + efree(msg); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + if (arg < INT32_MIN || arg > INT32_MAX) { + spprintf(&msg, NULL, "%s: offset argument is outside bounds of " + "a 32-bit wide integer", func_name); + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC); + efree(msg); + RETURN_FALSE; + } + + int32_t res = (bio->biter->*func)((int32_t)arg); + + RETURN_LONG((long)res); +} + +U_CFUNC PHP_FUNCTION(breakiter_first) +{ + _breakiter_no_args_ret_int32("breakiter_first", + &BreakIterator::first, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_last) +{ + _breakiter_no_args_ret_int32("breakiter_last", + &BreakIterator::last, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_previous) +{ + _breakiter_no_args_ret_int32("breakiter_previous", + &BreakIterator::previous, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_next) +{ + bool no_arg_version = false; + + if (ZEND_NUM_ARGS() == 0) { + no_arg_version = true; + } else if (ZEND_NUM_ARGS() == 1) { + zval **arg; + int res = zend_get_parameters_ex(1, &arg); + assert(res == SUCCESS); + if (Z_TYPE_PP(arg) == IS_NULL) { + no_arg_version = true; + ht = 0; /* pretend we don't have any argument */ + } else { + no_arg_version = false; + } + } + + if (no_arg_version) { + _breakiter_no_args_ret_int32("breakiter_next", + &BreakIterator::next, + INTERNAL_FUNCTION_PARAM_PASSTHRU); + } else { + _breakiter_int32_ret_int32("breakiter_next", + &BreakIterator::next, + INTERNAL_FUNCTION_PARAM_PASSTHRU); + } +} + +U_CFUNC PHP_FUNCTION(breakiter_current) +{ + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_current: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + int32_t res = bio->biter->current(); + + RETURN_LONG((long)res); +} + +U_CFUNC PHP_FUNCTION(breakiter_following) +{ + _breakiter_int32_ret_int32("breakiter_following", + &BreakIterator::following, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_preceding) +{ + _breakiter_int32_ret_int32("breakiter_preceding", + &BreakIterator::preceding, + INTERNAL_FUNCTION_PARAM_PASSTHRU); +} + +U_CFUNC PHP_FUNCTION(breakiter_is_boundary) +{ + long offset; + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", + &offset) == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_is_boundary: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + if (offset < INT32_MIN || offset > INT32_MAX) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_is_boundary: offset argument is outside bounds of " + "a 32-bit wide integer", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + UBool res = bio->biter->isBoundary((int32_t)offset); + + RETURN_BOOL((long)res); +} + +U_CFUNC PHP_FUNCTION(breakiter_get_locale) +{ + long locale_type; + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &locale_type) == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_get_locale: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + if (locale_type != ULOC_ACTUAL_LOCALE && locale_type != ULOC_VALID_LOCALE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_get_locale: invalid locale type", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + Locale locale = bio->biter->getLocale((ULocDataLocaleType)locale_type, + BREAKITER_ERROR_CODE(bio)); + INTL_METHOD_CHECK_STATUS(bio, + "breakiter_get_locale: Call to ICU method has failed"); + + RETURN_STRING(locale.getName(), 1); +} + +U_CFUNC PHP_FUNCTION(breakiter_get_parts_iterator) +{ + long key_type = 0; + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|l", &key_type) == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_get_parts_iterator: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + if (key_type != PARTS_ITERATOR_KEY_SEQUENTIAL + && key_type != PARTS_ITERATOR_KEY_LEFT + && key_type != PARTS_ITERATOR_KEY_RIGHT) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_get_parts_iterator: bad key type", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + IntlIterator_from_BreakIterator_parts( + object, return_value, (parts_iter_key_type)key_type TSRMLS_CC); +} + +U_CFUNC PHP_FUNCTION(breakiter_get_error_code) +{ + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_get_error_code: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + /* Fetch the object (without resetting its last error code ). */ + bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC); + if (bio == NULL) + RETURN_FALSE; + + RETURN_LONG((long)BREAKITER_ERROR_CODE(bio)); +} + +U_CFUNC PHP_FUNCTION(breakiter_get_error_message) +{ + const char* message = NULL; + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, + "breakiter_get_error_message: bad arguments", 0 TSRMLS_CC ); + RETURN_FALSE; + } + + + /* Fetch the object (without resetting its last error code ). */ + bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC); + if (bio == NULL) + RETURN_FALSE; + + /* Return last error message. */ + message = intl_error_get_message(BREAKITER_ERROR_P(bio) TSRMLS_CC); + RETURN_STRING(message, 0); +} diff --git a/ext/intl/breakiterator/breakiterator_methods.h b/ext/intl/breakiterator/breakiterator_methods.h new file mode 100644 index 0000000000..a479ac92e8 --- /dev/null +++ b/ext/intl/breakiterator/breakiterator_methods.h @@ -0,0 +1,64 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#ifndef BREAKITERATOR_METHODS_H +#define BREAKITERATOR_METHODS_H + +#include <php.h> + +PHP_METHOD(BreakIterator, __construct); + +PHP_FUNCTION(breakiter_create_word_instance); + +PHP_FUNCTION(breakiter_create_line_instance); + +PHP_FUNCTION(breakiter_create_character_instance); + +PHP_FUNCTION(breakiter_create_sentence_instance); + +PHP_FUNCTION(breakiter_create_title_instance); + +PHP_FUNCTION(breakiter_create_code_point_instance); + +PHP_FUNCTION(breakiter_get_text); + +PHP_FUNCTION(breakiter_set_text); + +PHP_FUNCTION(breakiter_first); + +PHP_FUNCTION(breakiter_last); + +PHP_FUNCTION(breakiter_previous); + +PHP_FUNCTION(breakiter_next); + +PHP_FUNCTION(breakiter_current); + +PHP_FUNCTION(breakiter_following); + +PHP_FUNCTION(breakiter_preceding); + +PHP_FUNCTION(breakiter_is_boundary); + +PHP_FUNCTION(breakiter_get_locale); + +PHP_FUNCTION(breakiter_get_parts_iterator); + +PHP_FUNCTION(breakiter_get_error_code); + +PHP_FUNCTION(breakiter_get_error_message); + +#endif
\ No newline at end of file diff --git a/ext/intl/breakiterator/codepointiterator_internal.cpp b/ext/intl/breakiterator/codepointiterator_internal.cpp new file mode 100644 index 0000000000..bf9239d531 --- /dev/null +++ b/ext/intl/breakiterator/codepointiterator_internal.cpp @@ -0,0 +1,291 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#include "codepointiterator_internal.h" +#include <unicode/uchriter.h> +#include <typeinfo> + +//copied from cmemory.h, which is not public +typedef union { + long t1; + double t2; + void *t3; +} UAlignedMemory; + +#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask)) +#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1) +#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr)) + +using namespace PHP; + +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator); + +CodePointBreakIterator::CodePointBreakIterator() +: BreakIterator(), fCharIter(NULL), lastCodePoint(U_SENTINEL) +{ + UErrorCode uec = UErrorCode(); + this->fText = utext_openUChars(NULL, NULL, 0, &uec); +} + +CodePointBreakIterator::CodePointBreakIterator(const PHP::CodePointBreakIterator &other) +: BreakIterator(other), fText(NULL), fCharIter(NULL), lastCodePoint(U_SENTINEL) +{ + *this = other; +} + +CodePointBreakIterator& CodePointBreakIterator::operator=(const CodePointBreakIterator& that) +{ + UErrorCode uec = UErrorCode(); + UText *ut_clone = NULL; + + if (this == &that) { + return *this; + } + + this->fText = utext_clone(this->fText, that.fText, FALSE, TRUE, &uec); + + //don't bother copying the character iterator, getText() is deprecated + clearCurrentCharIter(); + + this->lastCodePoint = that.lastCodePoint; + return *this; +} + +CodePointBreakIterator::~CodePointBreakIterator() +{ + if (this->fText) { + utext_close(this->fText); + } + clearCurrentCharIter(); +} + +UBool CodePointBreakIterator::operator==(const BreakIterator& that) const +{ + if (typeid(*this) != typeid(that)) { + return FALSE; + } + + const CodePointBreakIterator& that2 = + static_cast<const CodePointBreakIterator&>(that); + + if (!utext_equals(this->fText, that2.fText)) { + return FALSE; + } + + return TRUE; +} + +CodePointBreakIterator* CodePointBreakIterator::clone(void) const +{ + return new CodePointBreakIterator(*this); +} + +CharacterIterator& CodePointBreakIterator::getText(void) const +{ + if (this->fCharIter == NULL) { + //this method is deprecated anyway; setup bogus iterator + static const UChar c = 0; + this->fCharIter = new UCharCharacterIterator(&c, 0); + } + + return *this->fCharIter; +} + +UText *CodePointBreakIterator::getUText(UText *fillIn, UErrorCode &status) const +{ + return utext_clone(fillIn, this->fText, FALSE, TRUE, &status); +} + +void CodePointBreakIterator::setText(const UnicodeString &text) +{ + UErrorCode uec = UErrorCode(); + + //this closes the previous utext, if any + this->fText = utext_openConstUnicodeString(this->fText, &text, &uec); + + clearCurrentCharIter(); +} + +void CodePointBreakIterator::setText(UText *text, UErrorCode &status) +{ + if (U_FAILURE(status)) { + return; + } + + this->fText = utext_clone(this->fText, text, FALSE, TRUE, &status); + + clearCurrentCharIter(); +} + +void CodePointBreakIterator::adoptText(CharacterIterator* it) +{ + UErrorCode uec = UErrorCode(); + clearCurrentCharIter(); + + this->fCharIter = it; + this->fText = utext_openCharacterIterator(this->fText, it, &uec); +} + +int32_t CodePointBreakIterator::first(void) +{ + UTEXT_SETNATIVEINDEX(this->fText, 0); + this->lastCodePoint = U_SENTINEL; + + return 0; +} + +int32_t CodePointBreakIterator::last(void) +{ + int32_t pos = (int32_t)utext_nativeLength(this->fText); + UTEXT_SETNATIVEINDEX(this->fText, pos); + this->lastCodePoint = U_SENTINEL; + + return pos; +} + +int32_t CodePointBreakIterator::previous(void) +{ + this->lastCodePoint = UTEXT_PREVIOUS32(this->fText); + if (this->lastCodePoint == U_SENTINEL) { + return BreakIterator::DONE; + } + + return (int32_t)UTEXT_GETNATIVEINDEX(this->fText); +} + +int32_t CodePointBreakIterator::next(void) +{ + this->lastCodePoint = UTEXT_NEXT32(this->fText); + if (this->lastCodePoint == U_SENTINEL) { + return BreakIterator::DONE; + } + + return (int32_t)UTEXT_GETNATIVEINDEX(this->fText); +} + +int32_t CodePointBreakIterator::current(void) const +{ + return (int32_t)UTEXT_GETNATIVEINDEX(this->fText); +} + +int32_t CodePointBreakIterator::following(int32_t offset) +{ + this->lastCodePoint = utext_next32From(this->fText, offset); + if (this->lastCodePoint == U_SENTINEL) { + return BreakIterator::DONE; + } + + return (int32_t)UTEXT_GETNATIVEINDEX(this->fText); +} + +int32_t CodePointBreakIterator::preceding(int32_t offset) +{ + this->lastCodePoint = utext_previous32From(this->fText, offset); + if (this->lastCodePoint == U_SENTINEL) { + return BreakIterator::DONE; + } + + return (int32_t)UTEXT_GETNATIVEINDEX(this->fText); +} + +UBool CodePointBreakIterator::isBoundary(int32_t offset) +{ + //this function has side effects, and it's supposed to + utext_setNativeIndex(this->fText, offset); + return (offset == utext_getNativeIndex(this->fText)); +} + +int32_t CodePointBreakIterator::next(int32_t n) +{ + UBool res = utext_moveIndex32(this->fText, n); + +#ifndef UTEXT_CURRENT32 +#define UTEXT_CURRENT32 utext_current32 +#endif + + if (res) { + this->lastCodePoint = UTEXT_CURRENT32(this->fText); + return (int32_t)UTEXT_GETNATIVEINDEX(this->fText); + } else { + this->lastCodePoint = U_SENTINEL; + return BreakIterator::DONE; + } +} + +CodePointBreakIterator *CodePointBreakIterator::createBufferClone( + void *stackBuffer, int32_t &bufferSize, UErrorCode &status) +{ + //see implementation of RuleBasedBreakIterator::createBufferClone() + if (U_FAILURE(status)) { + return NULL; + } + + if (bufferSize <= 0) { + bufferSize = sizeof(CodePointBreakIterator) + U_ALIGNMENT_OFFSET_UP(0); + return NULL; + } + + char *buf = (char*)stackBuffer; + uint32_t s = bufferSize; + + if (stackBuffer == NULL) { + s = 0; + } + + if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) { + uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf); + s -= offsetUp; + buf += offsetUp; + } + + if (s < sizeof(CodePointBreakIterator)) { + CodePointBreakIterator *clonedBI = new CodePointBreakIterator(*this); + if (clonedBI == NULL) { + status = U_MEMORY_ALLOCATION_ERROR; + } else { + status = U_SAFECLONE_ALLOCATED_WARNING; + } + + return clonedBI; + } + + return new(buf) CodePointBreakIterator(*this); +} + +CodePointBreakIterator &CodePointBreakIterator::refreshInputText(UText *input, UErrorCode &status) +{ + //see implementation of RuleBasedBreakIterator::createBufferClone() + if (U_FAILURE(status)) { + return *this; + } + if (input == NULL) { + status = U_ILLEGAL_ARGUMENT_ERROR; + return *this; + } + + int64_t pos = utext_getNativeIndex(this->fText); + this->fText = utext_clone(this->fText, input, FALSE, TRUE, &status); + if (U_FAILURE(status)) { + return *this; + } + + utext_setNativeIndex(this->fText, pos); + if (utext_getNativeIndex(fText) != pos) { + status = U_ILLEGAL_ARGUMENT_ERROR; + } + + return *this; +} diff --git a/ext/intl/breakiterator/codepointiterator_internal.h b/ext/intl/breakiterator/codepointiterator_internal.h new file mode 100644 index 0000000000..988b91c200 --- /dev/null +++ b/ext/intl/breakiterator/codepointiterator_internal.h @@ -0,0 +1,98 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#ifndef CODEPOINTITERATOR_INTERNAL_H +#define CODEPOINTITERATOR_INTERNAL_H + +#include <unicode/brkiter.h> + +using U_ICU_NAMESPACE::BreakIterator; + +namespace PHP { + + class CodePointBreakIterator : public BreakIterator { + + public: + static UClassID getStaticClassID(); + + CodePointBreakIterator(); + + CodePointBreakIterator(const CodePointBreakIterator &other); + + CodePointBreakIterator& operator=(const CodePointBreakIterator& that); + + virtual ~CodePointBreakIterator(); + + virtual UBool operator==(const BreakIterator& that) const; + + virtual CodePointBreakIterator* clone(void) const; + + virtual UClassID getDynamicClassID(void) const; + + virtual CharacterIterator& getText(void) const; + + virtual UText *getUText(UText *fillIn, UErrorCode &status) const; + + virtual void setText(const UnicodeString &text); + + virtual void setText(UText *text, UErrorCode &status); + + virtual void adoptText(CharacterIterator* it); + + virtual int32_t first(void); + + virtual int32_t last(void); + + virtual int32_t previous(void); + + virtual int32_t next(void); + + virtual int32_t current(void) const; + + virtual int32_t following(int32_t offset); + + virtual int32_t preceding(int32_t offset); + + virtual UBool isBoundary(int32_t offset); + + virtual int32_t next(int32_t n); + + virtual CodePointBreakIterator *createBufferClone(void *stackBuffer, + int32_t &BufferSize, + UErrorCode &status); + + virtual CodePointBreakIterator &refreshInputText(UText *input, UErrorCode &status); + + inline UChar32 getLastCodePoint() + { + return this->lastCodePoint; + } + + private: + UText *fText; + UChar32 lastCodePoint; + mutable CharacterIterator *fCharIter; + + inline void clearCurrentCharIter() + { + delete this->fCharIter; + this->fCharIter = NULL; + this->lastCodePoint = U_SENTINEL; + } + }; +} + +#endif
\ No newline at end of file diff --git a/ext/intl/breakiterator/codepointiterator_methods.cpp b/ext/intl/breakiterator/codepointiterator_methods.cpp new file mode 100644 index 0000000000..ae7e526ead --- /dev/null +++ b/ext/intl/breakiterator/codepointiterator_methods.cpp @@ -0,0 +1,44 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#include "codepointiterator_internal.h" + +extern "C" { +#define USE_BREAKITERATOR_POINTER 1 +#include "breakiterator_class.h" +} + +using PHP::CodePointBreakIterator; + +static inline CodePointBreakIterator *fetch_cpbi(BreakIterator_object *bio) { + return (CodePointBreakIterator*)bio->biter; +} + +U_CFUNC PHP_FUNCTION(cpbi_get_last_code_point) +{ + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "cpbi_get_last_code_point: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + RETURN_LONG(fetch_cpbi(bio)->getLastCodePoint()); +}
\ No newline at end of file diff --git a/ext/intl/breakiterator/codepointiterator_methods.h b/ext/intl/breakiterator/codepointiterator_methods.h new file mode 100644 index 0000000000..d34e5b61e2 --- /dev/null +++ b/ext/intl/breakiterator/codepointiterator_methods.h @@ -0,0 +1,24 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#ifndef CODEPOINTITERATOR_METHODS_H +#define CODEPOINTITERATOR_METHODS_H + +#include <php.h> + +PHP_FUNCTION(cpbi_get_last_code_point); + +#endif
\ No newline at end of file diff --git a/ext/intl/breakiterator/rulebasedbreakiterator_methods.cpp b/ext/intl/breakiterator/rulebasedbreakiterator_methods.cpp new file mode 100644 index 0000000000..f2a39ba022 --- /dev/null +++ b/ext/intl/breakiterator/rulebasedbreakiterator_methods.cpp @@ -0,0 +1,219 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#include <unicode/rbbi.h> + +extern "C" { +#define USE_BREAKITERATOR_POINTER 1 +#include "breakiterator_class.h" +#include <zend_exceptions.h> +#include <limits.h> +} + +#include "../intl_convertcpp.h" + +static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) { + return (RuleBasedBreakIterator*)bio->biter; +} + +static void _php_intlgregcal_constructor_body(INTERNAL_FUNCTION_PARAMETERS) +{ + zval *object = getThis(); + char *rules; + int rules_len; + zend_bool compiled = 0; + UErrorCode status = U_ZERO_ERROR; + intl_error_reset(NULL TSRMLS_CC); + + if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b", + &rules, &rules_len, &compiled) == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "rbbi_create_instance: bad arguments", 0 TSRMLS_CC); + RETURN_NULL(); + } + + // instantiation of ICU object + RuleBasedBreakIterator *rbbi; + + if (!compiled) { + UnicodeString rulesStr; + UParseError parseError = UParseError(); + if (intl_stringFromChar(rulesStr, rules, rules_len, &status) + == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "rbbi_create_instance: rules were not a valid UTF-8 string", + 0 TSRMLS_CC); + RETURN_NULL(); + } + + rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status); + intl_error_set_code(NULL, status TSRMLS_CC); + if (U_FAILURE(status)) { + char *msg; + smart_str parse_error_str; + parse_error_str = intl_parse_error_to_string(&parseError); + spprintf(&msg, 0, "rbbi_create_instance: unable to create " + "RuleBasedBreakIterator from rules (%s)", parse_error_str.c); + smart_str_free(&parse_error_str); + intl_error_set_custom_msg(NULL, msg, 1 TSRMLS_CC); + efree(msg); + RETURN_NULL(); + } + } else { // compiled +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48 + rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status); + if (U_FAILURE(status)) { + intl_error_set(NULL, status, "rbbi_create_instance: unable to " + "create instance from compiled rules", 0 TSRMLS_CC); + RETURN_NULL(); + } +#else + intl_error_set(NULL, U_UNSUPPORTED_ERROR, "rbbi_create_instance: " + "compiled rules require ICU >= 4.8", 0 TSRMLS_CC); + RETURN_NULL(); +#endif + } + + breakiterator_object_create(return_value, rbbi TSRMLS_CC); +} + +U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct) +{ + zval orig_this = *getThis(); + + return_value = getThis(); + //changes this to IS_NULL (without first destroying) if there's an error + _php_intlgregcal_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU); + + if (Z_TYPE_P(return_value) == IS_NULL) { + zend_object_store_ctor_failed(&orig_this TSRMLS_CC); + zval_dtor(&orig_this); + } +} + +U_CFUNC PHP_FUNCTION(rbbi_get_rules) +{ + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "rbbi_get_rules: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + const UnicodeString rules = fetch_rbbi(bio)->getRules(); + + Z_TYPE_P(return_value) = IS_STRING; + if (intl_charFromString(rules, &Z_STRVAL_P(return_value), + &Z_STRLEN_P(return_value), BREAKITER_ERROR_CODE_P(bio)) == FAILURE) + { + intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio), + "rbbi_hash_code: Error converting result to UTF-8 string", + 0 TSRMLS_CC); + RETURN_FALSE; + } +} + +U_CFUNC PHP_FUNCTION(rbbi_get_rule_status) +{ + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "rbbi_get_rule_status: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + RETURN_LONG(fetch_rbbi(bio)->getRuleStatus()); +} + +U_CFUNC PHP_FUNCTION(rbbi_get_rule_status_vec) +{ + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "rbbi_get_rule_status_vec: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0, + BREAKITER_ERROR_CODE(bio)); + if (BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR) { + BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR; + } else { + // should not happen + INTL_METHOD_CHECK_STATUS(bio, "rbbi_get_rule_status_vec: failed " + " determining the number of status values"); + } + int32_t *rules = new int32_t[num_rules]; + num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules, + BREAKITER_ERROR_CODE(bio)); + if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) { + delete[] rules; + intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio), + "rbbi_get_rule_status_vec: failed obtaining the status values", + 0 TSRMLS_CC); + RETURN_FALSE; + } + + array_init_size(return_value, num_rules); + for (int32_t i = 0; i < num_rules; i++) { + add_next_index_long(return_value, rules[i]); + } + delete[] rules; +} + +#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48 +U_CFUNC PHP_FUNCTION(rbbi_get_binary_rules) +{ + BREAKITER_METHOD_INIT_VARS; + object = getThis(); + + if (zend_parse_parameters_none() == FAILURE) { + intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, + "rbbi_get_binary_rules: bad arguments", 0 TSRMLS_CC); + RETURN_FALSE; + } + + BREAKITER_METHOD_FETCH_OBJECT; + + uint32_t rules_len; + const uint8_t *rules = fetch_rbbi(bio)->getBinaryRules(rules_len); + + if (rules_len > INT_MAX - 1) { + intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio), + "rbbi_get_binary_rules: the rules are too large", + 0 TSRMLS_CC); + RETURN_FALSE; + } + + char *ret_rules = static_cast<char*>(emalloc(rules_len + 1)); + memcpy(ret_rules, rules, rules_len); + ret_rules[rules_len] = '\0'; + + RETURN_STRINGL(ret_rules, rules_len, 0); +} +#endif diff --git a/ext/intl/breakiterator/rulebasedbreakiterator_methods.h b/ext/intl/breakiterator/rulebasedbreakiterator_methods.h new file mode 100644 index 0000000000..edea4ea2a6 --- /dev/null +++ b/ext/intl/breakiterator/rulebasedbreakiterator_methods.h @@ -0,0 +1,32 @@ +/* + +----------------------------------------------------------------------+ + | PHP Version 5 | + +----------------------------------------------------------------------+ + | This source file is subject to version 3.01 of the PHP license, | + | that is bundled with this package in the file LICENSE, and is | + | available through the world-wide-web at the following url: | + | http://www.php.net/license/3_01.txt | + | If you did not receive a copy of the PHP license and are unable to | + | obtain it through the world-wide-web, please send a note to | + | license@php.net so we can mail you a copy immediately. | + +----------------------------------------------------------------------+ + | Authors: Gustavo Lopes <cataphract@php.net> | + +----------------------------------------------------------------------+ + */ + +#ifndef RULEBASEDBREAKITERATOR_METHODS_H +#define RULEBASEDBREAKITERATOR_METHODS_H + +#include <php.h> + +PHP_METHOD(IntlRuleBasedBreakIterator, __construct); + +PHP_FUNCTION(rbbi_get_rules); + +PHP_FUNCTION(rbbi_get_rule_status); + +PHP_FUNCTION(rbbi_get_rule_status_vec); + +PHP_FUNCTION(rbbi_get_binary_rules); + +#endif
\ No newline at end of file diff --git a/ext/intl/common/common_enum.cpp b/ext/intl/common/common_enum.cpp index a0e346061a..da47a437a6 100644 --- a/ext/intl/common/common_enum.cpp +++ b/ext/intl/common/common_enum.cpp @@ -26,45 +26,14 @@ #include "common_enum.h" extern "C" { -#include "intl_error.h" -#include "intl_data.h" #include <zend_interfaces.h> #include <zend_exceptions.h> } -static zend_class_entry *IntlIterator_ce_ptr; -static zend_object_handlers IntlIterator_handlers; - -typedef struct { - zend_object zo; - intl_error err; - zend_object_iterator *iterator; -} IntlIterator_object; - -#define INTLITERATOR_ERROR(ii) (ii)->err -#define INTLITERATOR_ERROR_P(ii) &(INTLITERATOR_ERROR(ii)) - -#define INTLITERATOR_ERROR_CODE(ii) INTL_ERROR_CODE(INTLITERATOR_ERROR(ii)) -#define INTLITERATOR_ERROR_CODE_P(ii) &(INTL_ERROR_CODE(INTLITERATOR_ERROR(ii))) - -#define INTLITERATOR_METHOD_INIT_VARS INTL_METHOD_INIT_VARS(IntlIterator, ii) -#define INTLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK INTL_METHOD_FETCH_OBJECT(IntlIterator, ii) -#define INTLITERATOR_METHOD_FETCH_OBJECT\ - object = getThis(); \ - INTLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; \ - if (ii->iterator == NULL) { \ - intl_errors_set(&ii->err, U_ILLEGAL_ARGUMENT_ERROR, "Found unconstructed IntlIterator", 0 TSRMLS_CC); \ - RETURN_FALSE; \ - } - -typedef struct { - zend_object_iterator zoi; - zval *current; - zval *wrapping_obj; - void (*destroy_free_it)(zend_object_iterator *iterator TSRMLS_DC); -} zoi_with_current; +zend_class_entry *IntlIterator_ce_ptr; +zend_object_handlers IntlIterator_handlers; -static void zoi_with_current_dtor(zend_object_iterator *iter TSRMLS_DC) +void zoi_with_current_dtor(zend_object_iterator *iter TSRMLS_DC) { zoi_with_current *zoiwc = (zoi_with_current*)iter; @@ -84,22 +53,22 @@ static void zoi_with_current_dtor(zend_object_iterator *iter TSRMLS_DC) * function being called by the iterator wrapper destructor function and * not finding the memory of this iterator allocated anymore. */ iter->funcs->invalidate_current(iter TSRMLS_CC); - zoiwc->destroy_free_it(iter TSRMLS_CC); + zoiwc->destroy_it(iter TSRMLS_CC); efree(iter); } } -static int zoi_with_current_valid(zend_object_iterator *iter TSRMLS_DC) +U_CFUNC int zoi_with_current_valid(zend_object_iterator *iter TSRMLS_DC) { return ((zoi_with_current*)iter)->current != NULL ? SUCCESS : FAILURE; } -static void zoi_with_current_get_current_data(zend_object_iterator *iter, zval ***data TSRMLS_DC) +U_CFUNC void zoi_with_current_get_current_data(zend_object_iterator *iter, zval ***data TSRMLS_DC) { *data = &((zoi_with_current*)iter)->current; } -static void zoi_with_current_invalidate_current(zend_object_iterator *iter TSRMLS_DC) +U_CFUNC void zoi_with_current_invalidate_current(zend_object_iterator *iter TSRMLS_DC) { zoi_with_current *zoi_iter = (zoi_with_current*)iter; if (zoi_iter->current) { @@ -155,7 +124,7 @@ static void string_enum_rewind(zend_object_iterator *iter TSRMLS_DC) } } -static void string_enum_destroy_free_it(zend_object_iterator *iter TSRMLS_DC) +static void string_enum_destroy_it(zend_object_iterator *iter TSRMLS_DC) { delete (StringEnumeration*)iter->data; } @@ -179,7 +148,7 @@ U_CFUNC void IntlIterator_from_StringEnumeration(StringEnumeration *se, zval *ob ii->iterator->data = (void*)se; ii->iterator->funcs = &string_enum_object_iterator_funcs; ii->iterator->index = 0; - ((zoi_with_current*)ii->iterator)->destroy_free_it = string_enum_destroy_free_it; + ((zoi_with_current*)ii->iterator)->destroy_it = string_enum_destroy_it; ((zoi_with_current*)ii->iterator)->wrapping_obj = object; ((zoi_with_current*)ii->iterator)->current = NULL; } @@ -331,7 +300,7 @@ static PHP_METHOD(IntlIterator, rewind) if (ii->iterator->funcs->rewind) { ii->iterator->funcs->rewind(ii->iterator TSRMLS_CC); } else { - intl_error_set(NULL, U_UNSUPPORTED_ERROR, + intl_errors_set(INTLITERATOR_ERROR_P(ii), U_UNSUPPORTED_ERROR, "IntlIterator::rewind: rewind not supported", 0 TSRMLS_CC); } } diff --git a/ext/intl/common/common_enum.h b/ext/intl/common/common_enum.h index f3c8bfcead..4c6abdb8f5 100644 --- a/ext/intl/common/common_enum.h +++ b/ext/intl/common/common_enum.h @@ -25,10 +25,49 @@ extern "C" { #include <math.h> #endif #include <php.h> +#include "../intl_error.h" +#include "../intl_data.h" #ifdef __cplusplus } #endif +#define INTLITERATOR_ERROR(ii) (ii)->err +#define INTLITERATOR_ERROR_P(ii) &(INTLITERATOR_ERROR(ii)) + +#define INTLITERATOR_ERROR_CODE(ii) INTL_ERROR_CODE(INTLITERATOR_ERROR(ii)) +#define INTLITERATOR_ERROR_CODE_P(ii) &(INTL_ERROR_CODE(INTLITERATOR_ERROR(ii))) + +#define INTLITERATOR_METHOD_INIT_VARS INTL_METHOD_INIT_VARS(IntlIterator, ii) +#define INTLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK INTL_METHOD_FETCH_OBJECT(IntlIterator, ii) +#define INTLITERATOR_METHOD_FETCH_OBJECT\ + object = getThis(); \ + INTLITERATOR_METHOD_FETCH_OBJECT_NO_CHECK; \ + if (ii->iterator == NULL) { \ + intl_errors_set(&ii->err, U_ILLEGAL_ARGUMENT_ERROR, "Found unconstructed IntlIterator", 0 TSRMLS_CC); \ + RETURN_FALSE; \ + } + +typedef struct { + zend_object zo; + intl_error err; + zend_object_iterator *iterator; +} IntlIterator_object; + +typedef struct { + zend_object_iterator zoi; + zval *current; + zval *wrapping_obj; + void (*destroy_it)(zend_object_iterator *iterator TSRMLS_DC); +} zoi_with_current; + +extern zend_class_entry *IntlIterator_ce_ptr; +extern zend_object_handlers IntlIterator_handlers; + +U_CFUNC void zoi_with_current_dtor(zend_object_iterator *iter TSRMLS_DC); +U_CFUNC int zoi_with_current_valid(zend_object_iterator *iter TSRMLS_DC); +U_CFUNC void zoi_with_current_get_current_data(zend_object_iterator *iter, zval ***data TSRMLS_DC); +U_CFUNC void zoi_with_current_invalidate_current(zend_object_iterator *iter TSRMLS_DC); + #ifdef __cplusplus U_CFUNC void IntlIterator_from_StringEnumeration(StringEnumeration *se, zval *object TSRMLS_DC); #endif diff --git a/ext/intl/config.m4 b/ext/intl/config.m4 index 431deeb7d2..d7eacbc0b4 100755 --- a/ext/intl/config.m4 +++ b/ext/intl/config.m4 @@ -75,6 +75,12 @@ if test "$PHP_INTL" != "no"; then calendar/calendar_class.cpp \ calendar/calendar_methods.cpp \ calendar/gregoriancalendar_methods.cpp \ + breakiterator/breakiterator_class.cpp \ + breakiterator/breakiterator_iterators.cpp \ + breakiterator/breakiterator_methods.cpp \ + breakiterator/rulebasedbreakiterator_methods.cpp \ + breakiterator/codepointiterator_internal.cpp \ + breakiterator/codepointiterator_methods.cpp \ idn/idn.c \ $icu_spoof_src, $ext_shared,,$ICU_INCS -Wno-write-strings) PHP_ADD_BUILD_DIR($ext_builddir/collator) @@ -91,4 +97,5 @@ if test "$PHP_INTL" != "no"; then PHP_ADD_BUILD_DIR($ext_builddir/calendar) PHP_ADD_BUILD_DIR($ext_builddir/idn) PHP_ADD_BUILD_DIR($ext_builddir/spoofchecker) + PHP_ADD_BUILD_DIR($ext_builddir/breakiterator) fi diff --git a/ext/intl/config.w32 b/ext/intl/config.w32 index 735749ab43..a223505f8c 100755 --- a/ext/intl/config.w32 +++ b/ext/intl/config.w32 @@ -102,6 +102,15 @@ if (PHP_INTL != "no") { gregoriancalendar_methods.cpp \ calendar_class.cpp", "intl"); + + ADD_SOURCES(configure_module_dirname + "/breakiterator", "\ + breakiterator_class.cpp \ + breakiterator_methods.cpp \ + breakiterator_iterators.cpp \ + rulebasedbreakiterator_methods.cpp \ + codepointiterator_internal.cpp \ + codepointiterator_methods.cpp ", + "intl"); ADD_FLAG("LIBS_INTL", "icudt.lib icuin.lib icuio.lib icule.lib iculx.lib"); AC_DEFINE("HAVE_INTL", 1, "Internationalization support enabled"); diff --git a/ext/intl/intl_error.c b/ext/intl/intl_error.c index 2c7066b081..99b1c6001c 100755 --- a/ext/intl/intl_error.c +++ b/ext/intl/intl_error.c @@ -25,6 +25,7 @@ #include "php_intl.h" #include "intl_error.h" +#include "intl_convert.h" ZEND_EXTERN_MODULE_GLOBALS( intl ) @@ -242,7 +243,82 @@ void intl_register_IntlException_class( TSRMLS_D ) default_exception_ce, NULL TSRMLS_CC ); IntlException_ce_ptr->create_object = default_exception_ce->create_object; } -/* }}} */ + +smart_str intl_parse_error_to_string( UParseError* pe ) +{ + smart_str ret = {0}; + char *buf; + int u8len; + UErrorCode status; + int any = 0; + + assert( pe != NULL ); + + smart_str_appends( &ret, "parse error " ); + if( pe->line > 0 ) + { + smart_str_appends( &ret, "on line " ); + smart_str_append_long( &ret, (long ) pe->line ); + any = 1; + } + if( pe->offset >= 0 ) { + if( any ) + smart_str_appends( &ret, ", " ); + else + smart_str_appends( &ret, "at " ); + + smart_str_appends( &ret, "offset " ); + smart_str_append_long( &ret, (long ) pe->offset ); + any = 1; + } + + if (pe->preContext[0] != 0 ) { + if( any ) + smart_str_appends( &ret, ", " ); + + smart_str_appends( &ret, "after \"" ); + intl_convert_utf16_to_utf8( &buf, &u8len, pe->preContext, -1, &status ); + if( U_FAILURE( status ) ) + { + smart_str_appends( &ret, "(could not convert parser error pre-context to UTF-8)" ); + } + else { + smart_str_appendl( &ret, buf, u8len ); + efree( buf ); + } + smart_str_appends( &ret, "\"" ); + any = 1; + } + + if( pe->postContext[0] != 0 ) + { + if( any ) + smart_str_appends( &ret, ", " ); + + smart_str_appends( &ret, "before or at \"" ); + intl_convert_utf16_to_utf8( &buf, &u8len, pe->postContext, -1, &status ); + if( U_FAILURE( status ) ) + { + smart_str_appends( &ret, "(could not convert parser error post-context to UTF-8)" ); + } + else + { + smart_str_appendl( &ret, buf, u8len ); + efree( buf ); + } + smart_str_appends( &ret, "\"" ); + any = 1; + } + + if( !any ) + { + smart_str_free( &ret ); + smart_str_appends( &ret, "no parse error" ); + } + + smart_str_0( &ret ); + return ret; +} /* * Local variables: diff --git a/ext/intl/intl_error.h b/ext/intl/intl_error.h index b5000a15de..4d8eb79327 100755 --- a/ext/intl/intl_error.h +++ b/ext/intl/intl_error.h @@ -20,6 +20,8 @@ #define INTL_ERROR_H #include <unicode/utypes.h> +#include <unicode/parseerr.h> +#include <ext/standard/php_smart_str.h> #define INTL_ERROR_CODE(e) (e).code @@ -44,6 +46,9 @@ void intl_errors_set_custom_msg( intl_error* err, char* msg, int copyMsg void intl_errors_set_code( intl_error* err, UErrorCode err_code TSRMLS_DC ); void intl_errors_set( intl_error* err, UErrorCode code, char* msg, int copyMsg TSRMLS_DC ); +// Other error helpers +smart_str intl_parse_error_to_string( UParseError* pe ); + // exported to be called on extension MINIT void intl_register_IntlException_class( TSRMLS_D ); diff --git a/ext/intl/php_intl.c b/ext/intl/php_intl.c index e0d1081514..19896a7108 100755 --- a/ext/intl/php_intl.c +++ b/ext/intl/php_intl.c @@ -78,6 +78,9 @@ #include "calendar/calendar_methods.h" #include "calendar/gregoriancalendar_methods.h" +#include "breakiterator/breakiterator_class.h" +#include "breakiterator/breakiterator_iterators.h" + #include "idn/idn.h" #if U_ICU_VERSION_MAJOR_NUM > 3 && U_ICU_VERSION_MINOR_NUM >=2 @@ -966,6 +969,12 @@ PHP_MINIT_FUNCTION( intl ) /* Register 'IntlIterator' PHP class */ intl_register_IntlIterator_class( TSRMLS_C ); + /* Register 'BreakIterator' class */ + breakiterator_register_BreakIterator_class( TSRMLS_C ); + + /* Register 'IntlPartsIterator' class */ + breakiterator_register_IntlPartsIterator_class( TSRMLS_C ); + /* Global error handling. */ intl_error_init( NULL TSRMLS_CC ); diff --git a/ext/intl/tests/breakiter___construct.phpt b/ext/intl/tests/breakiter___construct.phpt new file mode 100644 index 0000000000..9ea6a9cf11 --- /dev/null +++ b/ext/intl/tests/breakiter___construct.phpt @@ -0,0 +1,13 @@ +--TEST-- +IntlBreakIterator::__construct() should not be callable +--SKIPIF-- +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +new IntlBreakIterator(); +--EXPECTF-- + +Fatal error: Call to private IntlBreakIterator::__construct() from invalid context in %s on line %d diff --git a/ext/intl/tests/breakiter___construct_error.phpt b/ext/intl/tests/breakiter___construct_error.phpt new file mode 100644 index 0000000000..770f1403c7 --- /dev/null +++ b/ext/intl/tests/breakiter___construct_error.phpt @@ -0,0 +1,38 @@ +--TEST-- +IntlRuleBasedBreakIterator::__construct(): arg errors +--SKIPIF-- +<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?> +<?php if(version_compare(INTL_ICU_VERSION, '4.8') < 0) print 'skip ICU >= 4.8 only'; ?> +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +//missing ; at the end: +var_dump(new IntlRuleBasedBreakIterator('[\p{Letter}\uFFFD]+;[:number:]+')); +var_dump(new IntlRuleBasedBreakIterator()); +var_dump(new IntlRuleBasedBreakIterator(1,2,3)); +var_dump(new IntlRuleBasedBreakIterator('[\p{Letter}\uFFFD]+;[:number:]+;', array())); +var_dump(new IntlRuleBasedBreakIterator('[\p{Letter}\uFFFD]+;[:number:]+;', true)); + +--EXPECTF-- + +Warning: IntlRuleBasedBreakIterator::__construct(): rbbi_create_instance: unable to create RuleBasedBreakIterator from rules (parse error on line 1, offset 31) in %s on line %d +NULL + +Warning: IntlRuleBasedBreakIterator::__construct() expects at least 1 parameter, 0 given in %s on line %d + +Warning: IntlRuleBasedBreakIterator::__construct(): rbbi_create_instance: bad arguments in %s on line %d +NULL + +Warning: IntlRuleBasedBreakIterator::__construct() expects at most 2 parameters, 3 given in %s on line %d + +Warning: IntlRuleBasedBreakIterator::__construct(): rbbi_create_instance: bad arguments in %s on line %d +NULL + +Warning: IntlRuleBasedBreakIterator::__construct() expects parameter 2 to be boolean, array given in %s on line %d + +Warning: IntlRuleBasedBreakIterator::__construct(): rbbi_create_instance: bad arguments in %s on line %d +NULL + +Warning: IntlRuleBasedBreakIterator::__construct(): rbbi_create_instance: unable to create instance from compiled rules in %s on line %d +NULL diff --git a/ext/intl/tests/breakiter_clone_basic.phpt b/ext/intl/tests/breakiter_clone_basic.phpt new file mode 100644 index 0000000000..f5bcefc6f7 --- /dev/null +++ b/ext/intl/tests/breakiter_clone_basic.phpt @@ -0,0 +1,23 @@ +--TEST-- +IntlBreakIterator: clone handler +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = new IntlRuleBasedBreakIterator('[\p{Letter}\uFFFD]+;[:number:]+;'); +$bi_clone = clone $bi; +var_dump(get_class($bi), get_class($bi_clone)); +var_dump($bi == $bi_clone); + +$bi->setText('foobar'); +$bi_clone = clone $bi; +var_dump(get_class($bi), get_class($bi_clone)); +var_dump($bi == $bi_clone); + +--EXPECT-- +string(26) "IntlRuleBasedBreakIterator" +string(26) "IntlRuleBasedBreakIterator" +bool(true) +string(26) "IntlRuleBasedBreakIterator" +string(26) "IntlRuleBasedBreakIterator" +bool(true) diff --git a/ext/intl/tests/breakiter_createCodePointInstance_basic.phpt b/ext/intl/tests/breakiter_createCodePointInstance_basic.phpt new file mode 100644 index 0000000000..a43e82760c --- /dev/null +++ b/ext/intl/tests/breakiter_createCodePointInstance_basic.phpt @@ -0,0 +1,43 @@ +--TEST-- +IntlBreakIterator::createCodePointInstance(): basic test +--SKIPIF-- +<?php +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$text = 'ตัวอย่างข้อความ'; + +$codepoint_it = IntlBreakIterator::createCodePointInstance(); +var_dump(get_class($codepoint_it)); +$codepoint_it->setText($text); + +print_r(iterator_to_array($codepoint_it)); + +?> +==DONE== +--EXPECT-- +string(26) "IntlCodePointBreakIterator" +Array +( + [0] => 0 + [1] => 3 + [2] => 6 + [3] => 9 + [4] => 12 + [5] => 15 + [6] => 18 + [7] => 21 + [8] => 24 + [9] => 27 + [10] => 30 + [11] => 33 + [12] => 36 + [13] => 39 + [14] => 42 + [15] => 45 +) +==DONE== diff --git a/ext/intl/tests/breakiter_createCodePointInstance_error.phpt b/ext/intl/tests/breakiter_createCodePointInstance_error.phpt new file mode 100644 index 0000000000..90228e128f --- /dev/null +++ b/ext/intl/tests/breakiter_createCodePointInstance_error.phpt @@ -0,0 +1,18 @@ +--TEST-- +IntlBreakIterator::createCodePointInstance(): bad arguments +--SKIPIF-- +<?php +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +var_dump(IntlBreakIterator::createCodePointInstance(array())); +--EXPECTF-- + +Warning: IntlBreakIterator::createCodePointInstance() expects exactly 0 parameters, 1 given in %s on line %d + +Warning: IntlBreakIterator::createCodePointInstance(): breakiter_create_code_point_instance: bad arguments in %s on line %d +NULL + diff --git a/ext/intl/tests/breakiter_current_basic.phpt b/ext/intl/tests/breakiter_current_basic.phpt new file mode 100644 index 0000000000..515cb555bb --- /dev/null +++ b/ext/intl/tests/breakiter_current_basic.phpt @@ -0,0 +1,24 @@ +--TEST-- +IntlBreakIterator::current(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$bi = IntlBreakIterator::createWordInstance('pt'); +var_dump($bi->current()); +$bi->setText('foo bar trans zoo bee'); + +var_dump($bi->first()); +var_dump($bi->current()); +var_dump($bi->next()); +var_dump($bi->current()); +?> +==DONE== +--EXPECT-- +int(0) +int(0) +int(0) +int(3) +int(3) +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/breakiter_factories_basic.phpt b/ext/intl/tests/breakiter_factories_basic.phpt new file mode 100644 index 0000000000..333023a253 --- /dev/null +++ b/ext/intl/tests/breakiter_factories_basic.phpt @@ -0,0 +1,45 @@ +--TEST-- +IntlBreakIterator factories: basic tests +--SKIPIF-- +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "ja"); + +$m = array('createWordInstance', 'createLineInstance', 'createCharacterInstance', + 'createSentenceInstance', 'createTitleInstance'); + +$t = 'Frase 1... Frase 2'. + +$o1 = $o2 = null; +foreach ($m as $method) { + echo "===== $method =====\n"; + $o1 = call_user_func(array('IntlBreakIterator', $method), 'ja'); + var_dump($o1 == $o2); + $o2 = call_user_func(array('IntlBreakIterator', $method), NULL); + var_dump($o1 == $o2); + echo "\n"; +} +--EXPECT-- +===== createWordInstance ===== +bool(false) +bool(true) + +===== createLineInstance ===== +bool(false) +bool(true) + +===== createCharacterInstance ===== +bool(false) +bool(true) + +===== createSentenceInstance ===== +bool(false) +bool(true) + +===== createTitleInstance ===== +bool(false) +bool(true) + diff --git a/ext/intl/tests/breakiter_factories_error.phpt b/ext/intl/tests/breakiter_factories_error.phpt new file mode 100644 index 0000000000..6001946ad2 --- /dev/null +++ b/ext/intl/tests/breakiter_factories_error.phpt @@ -0,0 +1,39 @@ +--TEST-- +IntlBreakIterator factory methods: argument errors +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +var_dump(IntlBreakIterator::createWordInstance(array())); +var_dump(IntlBreakIterator::createSentenceInstance(NULL, 2)); +var_dump(IntlBreakIterator::createCharacterInstance(NULL, 2)); +var_dump(IntlBreakIterator::createTitleInstance(NULL, 2)); +var_dump(IntlBreakIterator::createLineInstance(NULL, 2)); + + +--EXPECTF-- + +Warning: IntlBreakIterator::createWordInstance() expects parameter 1 to be string, array given in %s on line %d + +Warning: IntlBreakIterator::createWordInstance(): breakiter_create_word_instance: bad arguments in %s on line %d +NULL + +Warning: IntlBreakIterator::createSentenceInstance() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::createSentenceInstance(): breakiter_create_sentence_instance: bad arguments in %s on line %d +NULL + +Warning: IntlBreakIterator::createCharacterInstance() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::createCharacterInstance(): breakiter_create_character_instance: bad arguments in %s on line %d +NULL + +Warning: IntlBreakIterator::createTitleInstance() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::createTitleInstance(): breakiter_create_title_instance: bad arguments in %s on line %d +NULL + +Warning: IntlBreakIterator::createLineInstance() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::createLineInstance(): breakiter_create_line_instance: bad arguments in %s on line %d +NULL diff --git a/ext/intl/tests/breakiter_first_basic.phpt b/ext/intl/tests/breakiter_first_basic.phpt new file mode 100644 index 0000000000..c8427adc6c --- /dev/null +++ b/ext/intl/tests/breakiter_first_basic.phpt @@ -0,0 +1,21 @@ +--TEST-- +IntlBreakIterator::first(): basic test +--SKIPIF-- +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = IntlBreakIterator::createWordInstance('pt'); +$bi->setText('foo bar trans'); + +var_dump($bi->current()); +var_dump($bi->next()); +var_dump($bi->first()); +var_dump($bi->current()); +--EXPECT-- +int(0) +int(3) +int(0) +int(0) diff --git a/ext/intl/tests/breakiter_first_last_previous_current_error.phpt b/ext/intl/tests/breakiter_first_last_previous_current_error.phpt new file mode 100644 index 0000000000..9865cdec58 --- /dev/null +++ b/ext/intl/tests/breakiter_first_last_previous_current_error.phpt @@ -0,0 +1,35 @@ +--TEST-- +IntlBreakIterator::first()/last()/previous()/current(): arg errors +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = new IntlRuleBasedBreakIterator('[\p{Letter}\uFFFD]+;[:number:]+;'); +$bi->setText("\x80sdfé\x90d888 dfsa9"); + +var_dump($bi->first(1)); +var_dump($bi->last(1)); +var_dump($bi->previous(1)); +var_dump($bi->current(1)); + +--EXPECTF-- + +Warning: IntlBreakIterator::first() expects exactly 0 parameters, 1 given in %s on line %d + +Warning: IntlBreakIterator::first(): breakiter_first: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::last() expects exactly 0 parameters, 1 given in %s on line %d + +Warning: IntlBreakIterator::last(): breakiter_last: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::previous() expects exactly 0 parameters, 1 given in %s on line %d + +Warning: IntlBreakIterator::previous(): breakiter_previous: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::current() expects exactly 0 parameters, 1 given in %s on line %d + +Warning: IntlBreakIterator::current(): breakiter_current: bad arguments in %s on line %d +bool(false) diff --git a/ext/intl/tests/breakiter_following_basic.phpt b/ext/intl/tests/breakiter_following_basic.phpt new file mode 100644 index 0000000000..967ccafb62 --- /dev/null +++ b/ext/intl/tests/breakiter_following_basic.phpt @@ -0,0 +1,20 @@ +--TEST-- +IntlBreakIterator::following(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$bi = IntlBreakIterator::createWordInstance('pt'); +$bi->setText('foo bar trans zoo bee'); + +var_dump($bi->following(5)); +var_dump($bi->following(50)); +var_dump($bi->following(-1)); +?> +==DONE== +--EXPECT-- +int(7) +int(-1) +int(0) +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/breakiter_following_preceding_isBoundary_error.phpt b/ext/intl/tests/breakiter_following_preceding_isBoundary_error.phpt new file mode 100644 index 0000000000..a4b60857ab --- /dev/null +++ b/ext/intl/tests/breakiter_following_preceding_isBoundary_error.phpt @@ -0,0 +1,47 @@ +--TEST-- +IntlBreakIterator::following()/preceding()/isBoundary(): arg errors +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = new IntlRuleBasedBreakIterator('[\p{Letter}\uFFFD]+;[:number:]+;'); +$bi->setText("\x80sdfé\x90d888 dfsa9"); + +var_dump($bi->following(1, 2)); +var_dump($bi->following(array())); +var_dump($bi->preceding(1, 2)); +var_dump($bi->preceding(array())); +var_dump($bi->isBoundary(1, 2)); +var_dump($bi->isBoundary(array())); + +--EXPECTF-- + +Warning: IntlBreakIterator::following() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::following(): breakiter_following: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::following() expects parameter 1 to be long, array given in %s on line %d + +Warning: IntlBreakIterator::following(): breakiter_following: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::preceding() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::preceding(): breakiter_preceding: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::preceding() expects parameter 1 to be long, array given in %s on line %d + +Warning: IntlBreakIterator::preceding(): breakiter_preceding: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::isBoundary() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::isBoundary(): breakiter_is_boundary: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::isBoundary() expects parameter 1 to be long, array given in %s on line %d + +Warning: IntlBreakIterator::isBoundary(): breakiter_is_boundary: bad arguments in %s on line %d +bool(false) diff --git a/ext/intl/tests/breakiter_getLocale_basic.phpt b/ext/intl/tests/breakiter_getLocale_basic.phpt new file mode 100644 index 0000000000..499316c169 --- /dev/null +++ b/ext/intl/tests/breakiter_getLocale_basic.phpt @@ -0,0 +1,17 @@ +--TEST-- +IntlBreakIterator::getLocale(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$bi = IntlBreakIterator::createSentenceInstance('pt'); + +var_dump($bi->getLocale(0)); +var_dump($bi->getLocale(1)); +?> +==DONE== +--EXPECT-- +string(4) "root" +string(4) "root" +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/breakiter_getLocale_error.phpt b/ext/intl/tests/breakiter_getLocale_error.phpt new file mode 100644 index 0000000000..1dec56db08 --- /dev/null +++ b/ext/intl/tests/breakiter_getLocale_error.phpt @@ -0,0 +1,29 @@ +--TEST-- +IntlBreakIterator::getLocale(): arg errors +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = new IntlRuleBasedBreakIterator('[\p{Letter}\uFFFD]+;[:number:]+;'); +$bi->setText("\x80sdfé\x90d888 dfsa9"); + +var_dump($bi->getLocale(1, 2)); +var_dump($bi->getLocale(array())); +var_dump($bi->getLocale()); + +--EXPECTF-- + +Warning: IntlBreakIterator::getLocale() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::getLocale(): breakiter_get_locale: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::getLocale() expects parameter 1 to be long, array given in %s on line %d + +Warning: IntlBreakIterator::getLocale(): breakiter_get_locale: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::getLocale() expects exactly 1 parameter, 0 given in %s on line %d + +Warning: IntlBreakIterator::getLocale(): breakiter_get_locale: bad arguments in %s on line %d +bool(false) diff --git a/ext/intl/tests/breakiter_getPartsIterator_basic.phpt b/ext/intl/tests/breakiter_getPartsIterator_basic.phpt new file mode 100644 index 0000000000..794bab3014 --- /dev/null +++ b/ext/intl/tests/breakiter_getPartsIterator_basic.phpt @@ -0,0 +1,33 @@ +--TEST-- +IntlBreakIterator::getPartsIterator(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$bi = IntlBreakIterator::createWordInstance('pt'); +$pi = $bi->getPartsIterator(); +var_dump(get_class($pi)); +print_r(iterator_to_array($pi)); + +$bi->setText("foo bar"); +$pi = $bi->getPartsIterator(); +var_dump(get_class($pi->getBreakIterator())); +print_r(iterator_to_array($pi)); +var_dump($pi->getRuleStatus()); +?> +==DONE== +--EXPECT-- +string(17) "IntlPartsIterator" +Array +( +) +string(26) "IntlRuleBasedBreakIterator" +Array +( + [0] => foo + [1] => + [2] => bar +) +int(0) +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/breakiter_getPartsIterator_error.phpt b/ext/intl/tests/breakiter_getPartsIterator_error.phpt new file mode 100644 index 0000000000..9737618033 --- /dev/null +++ b/ext/intl/tests/breakiter_getPartsIterator_error.phpt @@ -0,0 +1,33 @@ +--TEST-- +IntlBreakIterator::getPartsIterator(): bad args +--SKIPIF-- +<?php +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$it = IntlBreakIterator::createWordInstance(NULL); +var_dump($it->getPartsIterator(array())); +var_dump($it->getPartsIterator(1, 2)); +var_dump($it->getPartsIterator(-1)); + +?> +==DONE== +--EXPECTF-- + +Warning: IntlBreakIterator::getPartsIterator() expects parameter 1 to be long, array given in %s on line %d + +Warning: IntlBreakIterator::getPartsIterator(): breakiter_get_parts_iterator: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::getPartsIterator() expects at most 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::getPartsIterator(): breakiter_get_parts_iterator: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::getPartsIterator(): breakiter_get_parts_iterator: bad key type in %s on line %d +bool(false) +==DONE== diff --git a/ext/intl/tests/breakiter_getPartsIterator_var1.phpt b/ext/intl/tests/breakiter_getPartsIterator_var1.phpt new file mode 100644 index 0000000000..7bbd27ea45 --- /dev/null +++ b/ext/intl/tests/breakiter_getPartsIterator_var1.phpt @@ -0,0 +1,60 @@ +--TEST-- +IntlBreakIterator::getPartsIterator(): argument variations +--SKIPIF-- +<?php +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$text = 'foo bar tao'; + +$it = IntlBreakIterator::createWordInstance(NULL); +$it->setText($text); + +var_dump(iterator_to_array($it->getPartsIterator(IntlPartsIterator::KEY_SEQUENTIAL))); +var_dump(iterator_to_array($it->getPartsIterator(IntlPartsIterator::KEY_LEFT))); +var_dump(iterator_to_array($it->getPartsIterator(IntlPartsIterator::KEY_RIGHT))); + +?> +==DONE== +--EXPECT-- +array(5) { + [0]=> + string(3) "foo" + [1]=> + string(1) " " + [2]=> + string(3) "bar" + [3]=> + string(1) " " + [4]=> + string(3) "tao" +} +array(5) { + [0]=> + string(3) "foo" + [4]=> + string(1) " " + [5]=> + string(3) "bar" + [8]=> + string(1) " " + [9]=> + string(3) "tao" +} +array(5) { + [3]=> + string(3) "foo" + [5]=> + string(1) " " + [8]=> + string(3) "bar" + [9]=> + string(1) " " + [12]=> + string(3) "tao" +} +==DONE== diff --git a/ext/intl/tests/breakiter_getText_basic.phpt b/ext/intl/tests/breakiter_getText_basic.phpt new file mode 100644 index 0000000000..57f3e32aa3 --- /dev/null +++ b/ext/intl/tests/breakiter_getText_basic.phpt @@ -0,0 +1,16 @@ +--TEST-- +IntlBreakIterator::getText(): basic test +--SKIPIF-- +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = IntlBreakIterator::createWordInstance('pt'); +var_dump($bi->getText()); +$bi->setText('foo bar'); +var_dump($bi->getText()); +--EXPECTF-- +NULL +string(7) "foo bar" diff --git a/ext/intl/tests/breakiter_getText_error.phpt b/ext/intl/tests/breakiter_getText_error.phpt new file mode 100644 index 0000000000..f222002374 --- /dev/null +++ b/ext/intl/tests/breakiter_getText_error.phpt @@ -0,0 +1,15 @@ +--TEST-- +IntlBreakIterator::getText(): arg errors +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = new IntlRuleBasedBreakIterator('[\p{Letter}]+;'); +var_dump($bi->getText(array())); + +--EXPECTF-- + +Warning: IntlBreakIterator::getText() expects exactly 0 parameters, 1 given in %s on line %d + +Warning: IntlBreakIterator::getText(): breakiter_get_text: bad arguments in %s on line %d +bool(false) diff --git a/ext/intl/tests/breakiter_isBoundary_basic.phpt b/ext/intl/tests/breakiter_isBoundary_basic.phpt new file mode 100644 index 0000000000..87d8227352 --- /dev/null +++ b/ext/intl/tests/breakiter_isBoundary_basic.phpt @@ -0,0 +1,24 @@ +--TEST-- +IntlBreakIterator::isBoundary(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$bi = IntlBreakIterator::createWordInstance('pt'); +$bi->setText('foo bar trans zoo bee'); + +var_dump($bi->isBoundary(0)); +var_dump($bi->isBoundary(7)); +var_dump($bi->isBoundary(-1)); +var_dump($bi->isBoundary(1)); +var_dump($bi->isBoundary(50)); +?> +==DONE== +--EXPECT-- +bool(true) +bool(true) +bool(false) +bool(false) +bool(false) +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/breakiter_last_basic.phpt b/ext/intl/tests/breakiter_last_basic.phpt new file mode 100644 index 0000000000..0d3aead232 --- /dev/null +++ b/ext/intl/tests/breakiter_last_basic.phpt @@ -0,0 +1,19 @@ +--TEST-- +IntlBreakIterator::last(): basic test +--SKIPIF-- +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = IntlBreakIterator::createWordInstance('pt'); +$bi->setText('foo bar trans'); + +var_dump($bi->current()); +var_dump($bi->last()); +var_dump($bi->current()); +--EXPECTF-- +int(0) +int(13) +int(13) diff --git a/ext/intl/tests/breakiter_next_basic.phpt b/ext/intl/tests/breakiter_next_basic.phpt new file mode 100644 index 0000000000..3d535443b8 --- /dev/null +++ b/ext/intl/tests/breakiter_next_basic.phpt @@ -0,0 +1,26 @@ +--TEST-- +IntlBreakIterator::next(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$bi = IntlBreakIterator::createWordInstance('pt'); +$bi->setText('foo bar trans zoo bee'); + +var_dump($bi->first()); +var_dump($bi->next()); +var_dump($bi->next(2)); +var_dump($bi->next(-1)); +var_dump($bi->next(0)); +var_dump($bi->next(NULL)); +?> +==DONE== +--EXPECT-- +int(0) +int(3) +int(7) +int(4) +int(4) +int(7) +==DONE== diff --git a/ext/intl/tests/breakiter_next_error.phpt b/ext/intl/tests/breakiter_next_error.phpt new file mode 100644 index 0000000000..40d8f5813b --- /dev/null +++ b/ext/intl/tests/breakiter_next_error.phpt @@ -0,0 +1,23 @@ +--TEST-- +IntlBreakIterator::next(): arg errors +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = new IntlRuleBasedBreakIterator('[\p{Letter}\uFFFD]+;[:number:]+;'); +$bi->setText("\x80sdfé\x90d888 dfsa9"); + +var_dump($bi->next(1, 2)); +var_dump($bi->next(array())); + +--EXPECTF-- + +Warning: IntlBreakIterator::next() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::next(): breakiter_next: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::next() expects parameter 1 to be long, array given in %s on line %d + +Warning: IntlBreakIterator::next(): breakiter_next: bad arguments in %s on line %d +bool(false) diff --git a/ext/intl/tests/breakiter_preceding_basic.phpt b/ext/intl/tests/breakiter_preceding_basic.phpt new file mode 100644 index 0000000000..60695209cc --- /dev/null +++ b/ext/intl/tests/breakiter_preceding_basic.phpt @@ -0,0 +1,20 @@ +--TEST-- +IntlBreakIterator::preceding(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$bi = IntlBreakIterator::createWordInstance('pt'); +$bi->setText('foo bar trans zoo bee'); + +var_dump($bi->preceding(5)); +var_dump($bi->preceding(50)); +var_dump($bi->preceding(-1)); +?> +==DONE== +--EXPECT-- +int(4) +int(21) +int(0) +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/breakiter_previous_basic.phpt b/ext/intl/tests/breakiter_previous_basic.phpt new file mode 100644 index 0000000000..6d4f3bce5f --- /dev/null +++ b/ext/intl/tests/breakiter_previous_basic.phpt @@ -0,0 +1,18 @@ +--TEST-- +IntlBreakIterator::previous(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$bi = IntlBreakIterator::createWordInstance('pt'); +$bi->setText('foo bar trans'); + +var_dump($bi->last()); +var_dump($bi->previous()); +?> +==DONE== +--EXPECT-- +int(13) +int(8) +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/breakiter_setText_basic.phpt b/ext/intl/tests/breakiter_setText_basic.phpt new file mode 100644 index 0000000000..7b3fa2a6e5 --- /dev/null +++ b/ext/intl/tests/breakiter_setText_basic.phpt @@ -0,0 +1,35 @@ +--TEST-- +IntlBreakIterator::setText(): basic test +--SKIPIF-- +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +class A { +function __tostring() { return 'aaa'; } +} + +$bi = IntlBreakIterator::createWordInstance('pt'); +var_dump($bi->setText('foo bar')); +var_dump($bi->getText()); +var_dump($bi->setText(1)); +var_dump($bi->getText()); +var_dump($bi->setText(new A)); +var_dump($bi->getText()); + +/* setText resets the pointer */ +var_dump($bi->next()); +var_dump($bi->setText('foo bar')); +var_dump($bi->current()); +--EXPECT-- +bool(true) +string(7) "foo bar" +bool(true) +string(1) "1" +bool(true) +string(3) "aaa" +int(3) +bool(true) +int(0) diff --git a/ext/intl/tests/breakiter_setText_error.phpt b/ext/intl/tests/breakiter_setText_error.phpt new file mode 100644 index 0000000000..bfcda8ddaa --- /dev/null +++ b/ext/intl/tests/breakiter_setText_error.phpt @@ -0,0 +1,40 @@ +--TEST-- +IntlBreakIterator::setText(): arg errors +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$bi = new IntlRuleBasedBreakIterator('[\p{Letter}]+;'); +var_dump($bi->setText()); +var_dump($bi->setText(array())); +var_dump($bi->setText(1,2)); + +class A { +function __destruct() { var_dump('destructed'); throw new Exception('e'); } +function __tostring() { return 'foo'; } +} + +try { +var_dump($bi->setText(new A)); +} catch (Exception $e) { +var_dump($e->getMessage()); +} + +--EXPECTF-- + +Warning: IntlBreakIterator::setText() expects exactly 1 parameter, 0 given in %s on line %d + +Warning: IntlBreakIterator::setText(): breakiter_set_text: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::setText() expects parameter 1 to be string, array given in %s on line %d + +Warning: IntlBreakIterator::setText(): breakiter_set_text: bad arguments in %s on line %d +bool(false) + +Warning: IntlBreakIterator::setText() expects exactly 1 parameter, 2 given in %s on line %d + +Warning: IntlBreakIterator::setText(): breakiter_set_text: bad arguments in %s on line %d +bool(false) +string(10) "destructed" +string(1) "e" diff --git a/ext/intl/tests/cpbi_clone_equality.phpt b/ext/intl/tests/cpbi_clone_equality.phpt new file mode 100644 index 0000000000..c62b452747 --- /dev/null +++ b/ext/intl/tests/cpbi_clone_equality.phpt @@ -0,0 +1,33 @@ +--TEST-- +IntlCodePointBreakIterator: clone and equality +--SKIPIF-- +<?php +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$text = 'ตัวอย่างข้อความ'; +$text2 = 'foo'; + +$it = IntlBreakIterator::createCodePointInstance(); +$it->setText($text); + +$it_clone = clone $it; +var_dump($it == $it_clone); + +$it->setText($text2 ); +var_dump($it == $it_clone); + +$it_clone->setText($text2); +var_dump($it == $it_clone); + +?> +==DONE== +--EXPECT-- +bool(true) +bool(false) +bool(true) +==DONE== diff --git a/ext/intl/tests/cpbi_getLastCodePoint_basic.phpt b/ext/intl/tests/cpbi_getLastCodePoint_basic.phpt new file mode 100644 index 0000000000..74a07a6292 --- /dev/null +++ b/ext/intl/tests/cpbi_getLastCodePoint_basic.phpt @@ -0,0 +1,82 @@ +--TEST-- +IntlCodepointBreakIterator::getLastCodePoint(): basic test +--SKIPIF-- +<?php +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$text = 'ตัวอย่างข้อความ'; + +$codepoint_it = IntlBreakIterator::createCodePointInstance(); +$codepoint_it->setText($text); + +var_dump($codepoint_it->getLastCodePoint()); +//first() and last() don't read codepoint and set the last code point var to -1 +//The pointer is after the last read codepoint if moving forward and +//before the last read codepoint is moving backwards +$p = $codepoint_it->first(); +while ($p != IntlBreakIterator::DONE) { + $c = $codepoint_it->getLastCodePoint(); + if ($c > 0) + var_dump(sprintf('U+%04X', $codepoint_it->getLastCodePoint())); + else + var_dump($c); + //it's a post-increment operation as to the codepoint, i.e., it gives the codepoint + //starting at the initial position and only then moves the pointer forward + $p = $codepoint_it->next(); +} + +echo "Now backwards\n"; +$p = $codepoint_it->last(); +while ($p != IntlBreakIterator::DONE) { + $c = $codepoint_it->getLastCodePoint(); + if ($c > 0) + var_dump(sprintf('U+%04X', $codepoint_it->getLastCodePoint())); + else + var_dump($c); + $p = $codepoint_it->previous(); +} + + +?> +==DONE== +--EXPECT-- +int(-1) +int(-1) +string(6) "U+0E15" +string(6) "U+0E31" +string(6) "U+0E27" +string(6) "U+0E2D" +string(6) "U+0E22" +string(6) "U+0E48" +string(6) "U+0E32" +string(6) "U+0E07" +string(6) "U+0E02" +string(6) "U+0E49" +string(6) "U+0E2D" +string(6) "U+0E04" +string(6) "U+0E27" +string(6) "U+0E32" +string(6) "U+0E21" +Now backwards +int(-1) +string(6) "U+0E21" +string(6) "U+0E32" +string(6) "U+0E27" +string(6) "U+0E04" +string(6) "U+0E2D" +string(6) "U+0E49" +string(6) "U+0E02" +string(6) "U+0E07" +string(6) "U+0E32" +string(6) "U+0E48" +string(6) "U+0E22" +string(6) "U+0E2D" +string(6) "U+0E27" +string(6) "U+0E31" +string(6) "U+0E15" +==DONE== diff --git a/ext/intl/tests/cpbi_getLastCodePoint_error.phpt b/ext/intl/tests/cpbi_getLastCodePoint_error.phpt new file mode 100644 index 0000000000..78bd216629 --- /dev/null +++ b/ext/intl/tests/cpbi_getLastCodePoint_error.phpt @@ -0,0 +1,19 @@ +--TEST-- +IntlBreakIterator::getLastCodePoint(): bad args +--SKIPIF-- +<?php +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); + +$it = IntlBreakIterator::createCodePointInstance(); +var_dump($it->getLastCodePoint(array())); +--EXPECTF-- + +Warning: IntlCodePointBreakIterator::getLastCodePoint() expects exactly 0 parameters, 1 given in %s on line %d + +Warning: IntlCodePointBreakIterator::getLastCodePoint(): cpbi_get_last_code_point: bad arguments in %s on line %d +bool(false) + diff --git a/ext/intl/tests/cpbi_parts_iterator.phpt b/ext/intl/tests/cpbi_parts_iterator.phpt new file mode 100644 index 0000000000..4754c12371 --- /dev/null +++ b/ext/intl/tests/cpbi_parts_iterator.phpt @@ -0,0 +1,40 @@ +--TEST-- +IntlCodepointBreakIterator's part iterator +--SKIPIF-- +<?php +if (!extension_loaded('intl')) + die('skip intl extension not enabled'); +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$text = 'ตัวอย่างข้อความ'; + +$it = IntlBreakIterator::createCodePointInstance()->getPartsIterator(); +$it->getBreakIterator()->setText($text); + +foreach ($it as $k => $v) { + echo "$k. $v (" . sprintf("U+%04X", $it->getBreakIterator()->getLastCodePoint()) . + ") at {$it->getBreakIterator()->current()}\r\n"; +} + +?> +==DONE== +--EXPECT-- +0. ต (U+0E15) at 3 +1. ั (U+0E31) at 6 +2. ว (U+0E27) at 9 +3. อ (U+0E2D) at 12 +4. ย (U+0E22) at 15 +5. ่ (U+0E48) at 18 +6. า (U+0E32) at 21 +7. ง (U+0E07) at 24 +8. ข (U+0E02) at 27 +9. ้ (U+0E49) at 30 +10. อ (U+0E2D) at 33 +11. ค (U+0E04) at 36 +12. ว (U+0E27) at 39 +13. า (U+0E32) at 42 +14. ม (U+0E21) at 45 +==DONE== diff --git a/ext/intl/tests/rbbiter___construct_basic.phpt b/ext/intl/tests/rbbiter___construct_basic.phpt new file mode 100644 index 0000000000..2b14d826e3 --- /dev/null +++ b/ext/intl/tests/rbbiter___construct_basic.phpt @@ -0,0 +1,27 @@ +--TEST-- +IntlRuleBasedBreakIterator::__construct: basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$rules = <<<RULES +\$LN = [[:letter:] [:number:]]; +\$S = [.;,:]; + +!!forward; +\$LN+ {1}; +\$S+ {42}; +!!reverse; +\$LN+ {1}; +\$S+ {42}; +!!safe_forward; +!!safe_reverse; +RULES; +$rbbi = new IntlRuleBasedBreakIterator($rules); +var_dump(get_class($rbbi)); +?> +==DONE== +--EXPECT-- +string(26) "IntlRuleBasedBreakIterator" +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/rbbiter_getBinaryRules_basic.phpt b/ext/intl/tests/rbbiter_getBinaryRules_basic.phpt new file mode 100644 index 0000000000..dce0714d4d --- /dev/null +++ b/ext/intl/tests/rbbiter_getBinaryRules_basic.phpt @@ -0,0 +1,39 @@ +--TEST-- +IntlRuleBasedBreakIterator::getBinaryRules(): basic test +--SKIPIF-- +<?php if( !extension_loaded( 'intl' ) ) print 'skip'; ?> +<?php if(version_compare(INTL_ICU_VERSION, '4.8') < 0) print 'skip ICU >= 4.8 only'; ?> +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$rules = <<<RULES +\$LN = [[:letter:] [:number:]]; +\$S = [.;,:]; + +!!forward; +\$LN+ {1}; +\$S+ {42}; +!!reverse; +\$LN+ {1}; +\$S+ {42}; +!!safe_forward; +!!safe_reverse; +RULES; +$rbbi = new IntlRuleBasedBreakIterator($rules); +$rbbi->setText('sdfkjsdf88á.... ,;');; + +$br = $rbbi->getBinaryRules(); + +$rbbi2 = new IntlRuleBasedBreakIterator($br, true); + +var_dump($rbbi->getRules(), $rbbi2->getRules()); +var_dump($rbbi->getRules() == $rbbi2->getRules()); +?> +==DONE== +--EXPECT-- +string(128) "$LN = [[:letter:] [:number:]];$S = [.;,:];!!forward;$LN+ {1};$S+ {42};!!reverse;$LN+ {1};$S+ {42};!!safe_forward;!!safe_reverse;" +string(128) "$LN = [[:letter:] [:number:]];$S = [.;,:];!!forward;$LN+ {1};$S+ {42};!!reverse;$LN+ {1};$S+ {42};!!safe_forward;!!safe_reverse;" +bool(true) +==DONE== diff --git a/ext/intl/tests/rbbiter_getRuleStatusVec_basic.phpt b/ext/intl/tests/rbbiter_getRuleStatusVec_basic.phpt new file mode 100644 index 0000000000..a4f3352f9a --- /dev/null +++ b/ext/intl/tests/rbbiter_getRuleStatusVec_basic.phpt @@ -0,0 +1,55 @@ +--TEST-- +IntlRuleBasedBreakIterator::getRuleStatusVec(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$rules = <<<RULES +\$LN = [[:letter:] [:number:]]; +\$S = [.;,:]; + +!!forward; +\$LN+ {1}; +[^.]+ {4}; +\$S+ {42}; +!!reverse; +\$LN+ {1}; +[^.]+ {4}; +\$S+ {42}; +!!safe_forward; +!!safe_reverse; +RULES; +$rbbi = new IntlRuleBasedBreakIterator($rules); +$rbbi->setText('sdfkjsdf88á.... ,;');; + +do { + var_dump($rbbi->current(), $rbbi->getRuleStatusVec()); +} while ($rbbi->next() != IntlBreakIterator::DONE); + +?> +==DONE== +--EXPECT-- +int(0) +array(1) { + [0]=> + int(0) +} +int(12) +array(2) { + [0]=> + int(1) + [1]=> + int(4) +} +int(16) +array(1) { + [0]=> + int(42) +} +int(19) +array(1) { + [0]=> + int(4) +} +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/rbbiter_getRuleStatus_basic.phpt b/ext/intl/tests/rbbiter_getRuleStatus_basic.phpt new file mode 100644 index 0000000000..6199fdee7c --- /dev/null +++ b/ext/intl/tests/rbbiter_getRuleStatus_basic.phpt @@ -0,0 +1,42 @@ +--TEST-- +IntlRuleBasedBreakIterator::getRuleStatus(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$rules = <<<RULES +\$LN = [[:letter:] [:number:]]; +\$S = [.;,:]; + +!!forward; +\$LN+ {1}; +\$S+ {42}; +!!reverse; +\$LN+ {1}; +\$S+ {42}; +!!safe_forward; +!!safe_reverse; +RULES; +$rbbi = new IntlRuleBasedBreakIterator($rules); +$rbbi->setText('sdfkjsdf88á.... ,;'); + +do { + echo "pos : {$rbbi->current()}\n", + "rule status: {$rbbi->getRuleStatus()}\n"; +} while ($rbbi->next() != IntlBreakIterator::DONE); + +?> +==DONE== +--EXPECT-- +pos : 0 +rule status: 0 +pos : 12 +rule status: 1 +pos : 16 +rule status: 42 +pos : 17 +rule status: 0 +pos : 19 +rule status: 42 +==DONE==
\ No newline at end of file diff --git a/ext/intl/tests/rbbiter_getRules_basic.phpt b/ext/intl/tests/rbbiter_getRules_basic.phpt new file mode 100644 index 0000000000..e115e9b9e2 --- /dev/null +++ b/ext/intl/tests/rbbiter_getRules_basic.phpt @@ -0,0 +1,28 @@ +--TEST-- +IntlRuleBasedBreakIterator::getRules(): basic test +--FILE-- +<?php +ini_set("intl.error_level", E_WARNING); +ini_set("intl.default_locale", "pt_PT"); + +$rules = <<<RULES +\$LN = [[:letter:] [:number:]]; +\$S = [.;,:]; + +!!forward; +\$LN+ {1}; +\$S+ {42}; +!!reverse; +\$LN+ {1}; +\$S+ {42}; +!!safe_forward; +!!safe_reverse; +RULES; +$rbbi = new IntlRuleBasedBreakIterator($rules); +var_dump($rbbi->getRules()); + +?> +==DONE== +--EXPECT-- +string(128) "$LN = [[:letter:] [:number:]];$S = [.;,:];!!forward;$LN+ {1};$S+ {42};!!reverse;$LN+ {1};$S+ {42};!!safe_forward;!!safe_reverse;" +==DONE==
\ No newline at end of file diff --git a/ext/intl/transliterator/transliterator.c b/ext/intl/transliterator/transliterator.c index 75c9eaabda..8ee49e1e51 100644 --- a/ext/intl/transliterator/transliterator.c +++ b/ext/intl/transliterator/transliterator.c @@ -49,85 +49,6 @@ void transliterator_register_constants( INIT_FUNC_ARGS ) } /* }}} */ -/* {{{ transliterator_parse_error_to_string - * Transforms parse errors in strings. - */ -smart_str transliterator_parse_error_to_string( UParseError* pe ) -{ - smart_str ret = {0}; - char *buf; - int u8len; - UErrorCode status; - int any = 0; - - assert( pe != NULL ); - - smart_str_appends( &ret, "parse error " ); - if( pe->line > 0 ) - { - smart_str_appends( &ret, "on line " ); - smart_str_append_long( &ret, (long ) pe->line ); - any = 1; - } - if( pe->offset >= 0 ) { - if( any ) - smart_str_appends( &ret, ", " ); - else - smart_str_appends( &ret, "at " ); - - smart_str_appends( &ret, "offset " ); - smart_str_append_long( &ret, (long ) pe->offset ); - any = 1; - } - - if (pe->preContext[0] != 0 ) { - if( any ) - smart_str_appends( &ret, ", " ); - - smart_str_appends( &ret, "after \"" ); - intl_convert_utf16_to_utf8( &buf, &u8len, pe->preContext, -1, &status ); - if( U_FAILURE( status ) ) - { - smart_str_appends( &ret, "(could not convert parser error pre-context to UTF-8)" ); - } - else { - smart_str_appendl( &ret, buf, u8len ); - efree( buf ); - } - smart_str_appends( &ret, "\"" ); - any = 1; - } - - if( pe->postContext[0] != 0 ) - { - if( any ) - smart_str_appends( &ret, ", " ); - - smart_str_appends( &ret, "before or at \"" ); - intl_convert_utf16_to_utf8( &buf, &u8len, pe->postContext, -1, &status ); - if( U_FAILURE( status ) ) - { - smart_str_appends( &ret, "(could not convert parser error post-context to UTF-8)" ); - } - else - { - smart_str_appendl( &ret, buf, u8len ); - efree( buf ); - } - smart_str_appends( &ret, "\"" ); - any = 1; - } - - if( !any ) - { - smart_str_free( &ret ); - smart_str_appends( &ret, "no parse error" ); - } - - smart_str_0( &ret ); - return ret; -} - /* * Local variables: * tab-width: 4 diff --git a/ext/intl/transliterator/transliterator_methods.c b/ext/intl/transliterator/transliterator_methods.c index d0cfb9790d..1aa39c54b9 100644 --- a/ext/intl/transliterator/transliterator_methods.c +++ b/ext/intl/transliterator/transliterator_methods.c @@ -183,7 +183,7 @@ PHP_FUNCTION( transliterator_create_from_rules ) { char *msg = NULL; smart_str parse_error_str; - parse_error_str = transliterator_parse_error_to_string( &parse_error ); + parse_error_str = intl_parse_error_to_string( &parse_error ); spprintf( &msg, 0, "transliterator_create_from_rules: unable to " "create ICU transliterator from rules (%s)", parse_error_str.c ); smart_str_free( &parse_error_str ); |