summaryrefslogtreecommitdiff
path: root/ext/intl/breakiterator
diff options
context:
space:
mode:
Diffstat (limited to 'ext/intl/breakiterator')
-rw-r--r--ext/intl/breakiterator/breakiterator_class.cpp397
-rw-r--r--ext/intl/breakiterator/breakiterator_class.h71
-rw-r--r--ext/intl/breakiterator/breakiterator_iterators.cpp346
-rw-r--r--ext/intl/breakiterator/breakiterator_iterators.h42
-rw-r--r--ext/intl/breakiterator/breakiterator_methods.cpp452
-rw-r--r--ext/intl/breakiterator/breakiterator_methods.h64
-rw-r--r--ext/intl/breakiterator/codepointiterator_internal.cpp291
-rw-r--r--ext/intl/breakiterator/codepointiterator_internal.h98
-rw-r--r--ext/intl/breakiterator/codepointiterator_methods.cpp44
-rw-r--r--ext/intl/breakiterator/codepointiterator_methods.h24
-rw-r--r--ext/intl/breakiterator/rulebasedbreakiterator_methods.cpp221
-rw-r--r--ext/intl/breakiterator/rulebasedbreakiterator_methods.h32
12 files changed, 2082 insertions, 0 deletions
diff --git a/ext/intl/breakiterator/breakiterator_class.cpp b/ext/intl/breakiterator/breakiterator_class.cpp
new file mode 100644
index 0000000000..7bf271a344
--- /dev/null
+++ b/ext/intl/breakiterator/breakiterator_class.cpp
@@ -0,0 +1,397 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <unicode/brkiter.h>
+#include <unicode/rbbi.h>
+#include "codepointiterator_internal.h"
+
+#include "breakiterator_iterators.h"
+
+#include <typeinfo>
+
+extern "C" {
+#define USE_BREAKITERATOR_POINTER 1
+#include "breakiterator_class.h"
+#include "breakiterator_methods.h"
+#include "rulebasedbreakiterator_methods.h"
+#include "codepointiterator_methods.h"
+#include <zend_exceptions.h>
+#include <zend_interfaces.h>
+#include <assert.h>
+}
+
+using PHP::CodePointBreakIterator;
+
+/* {{{ Global variables */
+zend_class_entry *BreakIterator_ce_ptr;
+zend_class_entry *RuleBasedBreakIterator_ce_ptr;
+zend_class_entry *CodePointBreakIterator_ce_ptr;
+zend_object_handlers BreakIterator_handlers;
+/* }}} */
+
+U_CFUNC void breakiterator_object_create(zval *object,
+ BreakIterator *biter TSRMLS_DC)
+{
+ UClassID classId = biter->getDynamicClassID();
+ zend_class_entry *ce;
+
+ if (classId == RuleBasedBreakIterator::getStaticClassID()) {
+ ce = RuleBasedBreakIterator_ce_ptr;
+ } else if (classId == CodePointBreakIterator::getStaticClassID()) {
+ ce = CodePointBreakIterator_ce_ptr;
+ } else {
+ ce = BreakIterator_ce_ptr;
+ }
+
+ object_init_ex(object, ce);
+ breakiterator_object_construct(object, biter TSRMLS_CC);
+}
+
+U_CFUNC void breakiterator_object_construct(zval *object,
+ BreakIterator *biter TSRMLS_DC)
+{
+ BreakIterator_object *bio;
+
+ BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK; //populate to from object
+ assert(bio->biter == NULL);
+ bio->biter = biter;
+}
+
+/* {{{ compare handler for BreakIterator */
+static int BreakIterator_compare_objects(zval *object1,
+ zval *object2 TSRMLS_DC)
+{
+ BreakIterator_object *bio1,
+ *bio2;
+
+ bio1 = (BreakIterator_object*)zend_object_store_get_object(object1 TSRMLS_CC);
+ bio2 = (BreakIterator_object*)zend_object_store_get_object(object2 TSRMLS_CC);
+
+ if (bio1->biter == NULL || bio2->biter == NULL) {
+ return bio1->biter == bio2->biter ? 0 : 1;
+ }
+
+ return *bio1->biter == *bio2->biter ? 0 : 1;
+}
+/* }}} */
+
+/* {{{ clone handler for BreakIterator */
+static zend_object_value BreakIterator_clone_obj(zval *object TSRMLS_DC)
+{
+ BreakIterator_object *bio_orig,
+ *bio_new;
+ zend_object_value ret_val;
+
+ bio_orig = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
+ intl_errors_reset(INTL_DATA_ERROR_P(bio_orig) TSRMLS_CC);
+
+ ret_val = BreakIterator_ce_ptr->create_object(Z_OBJCE_P(object) TSRMLS_CC);
+ bio_new = (BreakIterator_object*)zend_object_store_get_object_by_handle(
+ ret_val.handle TSRMLS_CC);
+
+ zend_objects_clone_members(&bio_new->zo, ret_val,
+ &bio_orig->zo, Z_OBJ_HANDLE_P(object) TSRMLS_CC);
+
+ if (bio_orig->biter != NULL) {
+ BreakIterator *new_biter;
+
+ new_biter = bio_orig->biter->clone();
+ if (!new_biter) {
+ char *err_msg;
+ intl_errors_set_code(BREAKITER_ERROR_P(bio_orig),
+ U_MEMORY_ALLOCATION_ERROR TSRMLS_CC);
+ intl_errors_set_custom_msg(BREAKITER_ERROR_P(bio_orig),
+ "Could not clone BreakIterator", 0 TSRMLS_CC);
+ err_msg = intl_error_get_message(BREAKITER_ERROR_P(bio_orig) TSRMLS_CC);
+ zend_throw_exception(NULL, err_msg, 0 TSRMLS_CC);
+ efree(err_msg);
+ } else {
+ bio_new->biter = new_biter;
+ bio_new->text = bio_orig->text;
+ if (bio_new->text) {
+ zval_add_ref(&bio_new->text);
+ }
+ }
+ } else {
+ zend_throw_exception(NULL, "Cannot clone unconstructed BreakIterator", 0 TSRMLS_CC);
+ }
+
+ return ret_val;
+}
+/* }}} */
+
+/* {{{ get_debug_info handler for BreakIterator */
+static HashTable *BreakIterator_get_debug_info(zval *object, int *is_temp TSRMLS_DC)
+{
+ zval zv = zval_used_for_init;
+ BreakIterator_object *bio;
+ const BreakIterator *biter;
+
+ *is_temp = 1;
+
+ array_init_size(&zv, 8);
+
+ bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
+ biter = bio->biter;
+
+ if (biter == NULL) {
+ add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 0);
+ return Z_ARRVAL(zv);
+ }
+ add_assoc_bool_ex(&zv, "valid", sizeof("valid"), 1);
+
+ if (bio->text == NULL) {
+ add_assoc_null_ex(&zv, "text", sizeof("text"));
+ } else {
+ zval_add_ref(&bio->text);
+ add_assoc_zval_ex(&zv, "text", sizeof("text"), bio->text);
+ }
+
+ add_assoc_string_ex(&zv, "type", sizeof("type"),
+ const_cast<char*>(typeid(*biter).name()), 1);
+
+ return Z_ARRVAL(zv);
+}
+/* }}} */
+
+/* {{{ void breakiterator_object_init(BreakIterator_object* to)
+ * Initialize internals of BreakIterator_object not specific to zend standard objects.
+ */
+static void breakiterator_object_init(BreakIterator_object *bio TSRMLS_DC)
+{
+ intl_error_init(BREAKITER_ERROR_P(bio) TSRMLS_CC);
+ bio->biter = NULL;
+ bio->text = NULL;
+}
+/* }}} */
+
+/* {{{ BreakIterator_objects_dtor */
+static void BreakIterator_objects_dtor(void *object,
+ zend_object_handle handle TSRMLS_DC)
+{
+ zend_objects_destroy_object((zend_object*)object, handle TSRMLS_CC);
+}
+/* }}} */
+
+/* {{{ BreakIterator_objects_free */
+static void BreakIterator_objects_free(zend_object *object TSRMLS_DC)
+{
+ BreakIterator_object* bio = (BreakIterator_object*) object;
+
+ if (bio->text) {
+ zval_ptr_dtor(&bio->text);
+ }
+ if (bio->biter) {
+ delete bio->biter;
+ bio->biter = NULL;
+ }
+ intl_error_reset(BREAKITER_ERROR_P(bio) TSRMLS_CC);
+
+ zend_object_std_dtor(&bio->zo TSRMLS_CC);
+
+ efree(bio);
+}
+/* }}} */
+
+/* {{{ BreakIterator_object_create */
+static zend_object_value BreakIterator_object_create(zend_class_entry *ce TSRMLS_DC)
+{
+ zend_object_value retval;
+ BreakIterator_object* intern;
+
+ intern = (BreakIterator_object*)ecalloc(1, sizeof(BreakIterator_object));
+
+ zend_object_std_init(&intern->zo, ce TSRMLS_CC);
+#if PHP_VERSION_ID < 50399
+ zend_hash_copy(intern->zo.properties, &(ce->default_properties),
+ (copy_ctor_func_t) zval_add_ref, NULL, sizeof(zval*));
+#else
+ object_properties_init((zend_object*) intern, ce);
+#endif
+ breakiterator_object_init(intern TSRMLS_CC);
+
+ retval.handle = zend_objects_store_put(
+ intern,
+ BreakIterator_objects_dtor,
+ (zend_objects_free_object_storage_t) BreakIterator_objects_free,
+ NULL TSRMLS_CC);
+
+ retval.handlers = &BreakIterator_handlers;
+
+ return retval;
+}
+/* }}} */
+
+/* {{{ BreakIterator/RuleBasedBreakIterator methods arguments info */
+
+ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_void, 0, 0, 0)
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_locale, 0, 0, 0)
+ ZEND_ARG_INFO(0, "locale")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_setText, 0, 0, 1)
+ ZEND_ARG_INFO(0, "text")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_next, 0, 0, 0)
+ ZEND_ARG_INFO(0, "offset")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_offset, 0, 0, 1)
+ ZEND_ARG_INFO(0, "offset")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_get_locale, 0, 0, 1)
+ ZEND_ARG_INFO(0, "locale_type")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(ainfo_biter_getPartsIterator, 0, 0, 0)
+ ZEND_ARG_INFO(0, "key_type")
+ZEND_END_ARG_INFO()
+
+ZEND_BEGIN_ARG_INFO_EX(ainfo_rbbi___construct, 0, 0, 1)
+ ZEND_ARG_INFO(0, "rules")
+ ZEND_ARG_INFO(0, "areCompiled")
+ZEND_END_ARG_INFO()
+
+/* }}} */
+
+/* {{{ BreakIterator_class_functions
+ * Every 'BreakIterator' class method has an entry in this table
+ */
+static const zend_function_entry BreakIterator_class_functions[] = {
+ PHP_ME(BreakIterator, __construct, ainfo_biter_void, ZEND_ACC_PRIVATE)
+ PHP_ME_MAPPING(createWordInstance, breakiter_create_word_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(createLineInstance, breakiter_create_line_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(createCharacterInstance, breakiter_create_character_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(createSentenceInstance, breakiter_create_sentence_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(createTitleInstance, breakiter_create_title_instance, ainfo_biter_locale, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(createCodePointInstance, breakiter_create_code_point_instance, ainfo_biter_void, ZEND_ACC_STATIC | ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(getText, breakiter_get_text, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(setText, breakiter_set_text, ainfo_biter_setText, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(first, breakiter_first, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(last, breakiter_last, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(previous, breakiter_previous, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(next, breakiter_next, ainfo_biter_next, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(current, breakiter_current, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(following, breakiter_following, ainfo_biter_offset, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(preceding, breakiter_preceding, ainfo_biter_offset, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(isBoundary, breakiter_is_boundary, ainfo_biter_offset, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(getLocale, breakiter_get_locale, ainfo_biter_get_locale, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(getPartsIterator, breakiter_get_parts_iterator, ainfo_biter_getPartsIterator, ZEND_ACC_PUBLIC)
+
+ PHP_ME_MAPPING(getErrorCode, breakiter_get_error_code, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(getErrorMessage, breakiter_get_error_message, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_FE_END
+};
+/* }}} */
+
+/* {{{ RuleBasedBreakIterator_class_functions
+ */
+static const zend_function_entry RuleBasedBreakIterator_class_functions[] = {
+ PHP_ME(IntlRuleBasedBreakIterator, __construct, ainfo_rbbi___construct, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(getRules, rbbi_get_rules, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(getRuleStatus, rbbi_get_rule_status, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_ME_MAPPING(getRuleStatusVec, rbbi_get_rule_status_vec, ainfo_biter_void, ZEND_ACC_PUBLIC)
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
+ PHP_ME_MAPPING(getBinaryRules, rbbi_get_binary_rules, ainfo_biter_void, ZEND_ACC_PUBLIC)
+#endif
+ PHP_FE_END
+};
+/* }}} */
+
+/* {{{ CodePointBreakIterator_class_functions
+ */
+static const zend_function_entry CodePointBreakIterator_class_functions[] = {
+ PHP_ME_MAPPING(getLastCodePoint, cpbi_get_last_code_point, ainfo_biter_void, ZEND_ACC_PUBLIC)
+ PHP_FE_END
+};
+/* }}} */
+
+
+/* {{{ breakiterator_register_BreakIterator_class
+ * Initialize 'BreakIterator' class
+ */
+U_CFUNC void breakiterator_register_BreakIterator_class(TSRMLS_D)
+{
+ zend_class_entry ce;
+
+ /* Create and register 'BreakIterator' class. */
+ INIT_CLASS_ENTRY(ce, "IntlBreakIterator", BreakIterator_class_functions);
+ ce.create_object = BreakIterator_object_create;
+ ce.get_iterator = _breakiterator_get_iterator;
+ BreakIterator_ce_ptr = zend_register_internal_class(&ce TSRMLS_CC);
+
+ memcpy(&BreakIterator_handlers, zend_get_std_object_handlers(),
+ sizeof BreakIterator_handlers);
+ BreakIterator_handlers.compare_objects = BreakIterator_compare_objects;
+ BreakIterator_handlers.clone_obj = BreakIterator_clone_obj;
+ BreakIterator_handlers.get_debug_info = BreakIterator_get_debug_info;
+
+ zend_class_implements(BreakIterator_ce_ptr TSRMLS_CC, 1,
+ zend_ce_traversable);
+
+ zend_declare_class_constant_long(BreakIterator_ce_ptr,
+ "DONE", sizeof("DONE") - 1, BreakIterator::DONE TSRMLS_CC );
+
+ /* Declare constants that are defined in the C header */
+#define BREAKITER_DECL_LONG_CONST(name) \
+ zend_declare_class_constant_long(BreakIterator_ce_ptr, #name, \
+ sizeof(#name) - 1, UBRK_ ## name TSRMLS_CC)
+
+ BREAKITER_DECL_LONG_CONST(WORD_NONE);
+ BREAKITER_DECL_LONG_CONST(WORD_NONE_LIMIT);
+ BREAKITER_DECL_LONG_CONST(WORD_NUMBER);
+ BREAKITER_DECL_LONG_CONST(WORD_NUMBER_LIMIT);
+ BREAKITER_DECL_LONG_CONST(WORD_LETTER);
+ BREAKITER_DECL_LONG_CONST(WORD_LETTER_LIMIT);
+ BREAKITER_DECL_LONG_CONST(WORD_KANA);
+ BREAKITER_DECL_LONG_CONST(WORD_KANA_LIMIT);
+ BREAKITER_DECL_LONG_CONST(WORD_IDEO);
+ BREAKITER_DECL_LONG_CONST(WORD_IDEO_LIMIT);
+
+ BREAKITER_DECL_LONG_CONST(LINE_SOFT);
+ BREAKITER_DECL_LONG_CONST(LINE_SOFT_LIMIT);
+ BREAKITER_DECL_LONG_CONST(LINE_HARD);
+ BREAKITER_DECL_LONG_CONST(LINE_HARD_LIMIT);
+
+ BREAKITER_DECL_LONG_CONST(SENTENCE_TERM);
+ BREAKITER_DECL_LONG_CONST(SENTENCE_TERM_LIMIT);
+ BREAKITER_DECL_LONG_CONST(SENTENCE_SEP);
+ BREAKITER_DECL_LONG_CONST(SENTENCE_SEP_LIMIT);
+
+#undef BREAKITER_DECL_LONG_CONST
+
+
+ /* Create and register 'RuleBasedBreakIterator' class. */
+ INIT_CLASS_ENTRY(ce, "IntlRuleBasedBreakIterator",
+ RuleBasedBreakIterator_class_functions);
+ RuleBasedBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce,
+ BreakIterator_ce_ptr, NULL TSRMLS_CC);
+
+ /* Create and register 'CodePointBreakIterator' class. */
+ INIT_CLASS_ENTRY(ce, "IntlCodePointBreakIterator",
+ CodePointBreakIterator_class_functions);
+ CodePointBreakIterator_ce_ptr = zend_register_internal_class_ex(&ce,
+ BreakIterator_ce_ptr, NULL TSRMLS_CC);
+}
+/* }}} */
diff --git a/ext/intl/breakiterator/breakiterator_class.h b/ext/intl/breakiterator/breakiterator_class.h
new file mode 100644
index 0000000000..cc5d51256f
--- /dev/null
+++ b/ext/intl/breakiterator/breakiterator_class.h
@@ -0,0 +1,71 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifndef BREAKITERATOR_CLASS_H
+#define BREAKITERATOR_CLASS_H
+
+//redefinition of inline in PHP headers causes problems, so include this before
+#include <math.h>
+
+#include <php.h>
+#include "../intl_error.h"
+#include "../intl_data.h"
+
+#ifndef USE_BREAKITERATOR_POINTER
+typedef void BreakIterator;
+#endif
+
+typedef struct {
+ zend_object zo;
+
+ // error handling
+ intl_error err;
+
+ // ICU break iterator
+ BreakIterator* biter;
+
+ // current text
+ zval *text;
+} BreakIterator_object;
+
+#define BREAKITER_ERROR(bio) (bio)->err
+#define BREAKITER_ERROR_P(bio) &(BREAKITER_ERROR(bio))
+
+#define BREAKITER_ERROR_CODE(bio) INTL_ERROR_CODE(BREAKITER_ERROR(bio))
+#define BREAKITER_ERROR_CODE_P(bio) &(INTL_ERROR_CODE(BREAKITER_ERROR(bio)))
+
+#define BREAKITER_METHOD_INIT_VARS INTL_METHOD_INIT_VARS(BreakIterator, bio)
+#define BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK INTL_METHOD_FETCH_OBJECT(BreakIterator, bio)
+#define BREAKITER_METHOD_FETCH_OBJECT \
+ BREAKITER_METHOD_FETCH_OBJECT_NO_CHECK; \
+ if (bio->biter == NULL) \
+ { \
+ intl_errors_set(&bio->err, U_ILLEGAL_ARGUMENT_ERROR, "Found unconstructed BreakIterator", 0 TSRMLS_CC); \
+ RETURN_FALSE; \
+ }
+
+void breakiterator_object_create(zval *object, BreakIterator *break_iter TSRMLS_DC);
+
+void breakiterator_object_construct(zval *object, BreakIterator *break_iter TSRMLS_DC);
+
+void breakiterator_register_BreakIterator_class(TSRMLS_D);
+
+extern zend_class_entry *BreakIterator_ce_ptr,
+ *RuleBasedBreakIterator_ce_ptr;
+
+extern zend_object_handlers BreakIterator_handlers;
+
+#endif /* #ifndef BREAKITERATOR_CLASS_H */
diff --git a/ext/intl/breakiterator/breakiterator_iterators.cpp b/ext/intl/breakiterator/breakiterator_iterators.cpp
new file mode 100644
index 0000000000..d88ad8a712
--- /dev/null
+++ b/ext/intl/breakiterator/breakiterator_iterators.cpp
@@ -0,0 +1,346 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <unicode/brkiter.h>
+
+#include "breakiterator_iterators.h"
+#include "../common/common_enum.h"
+
+extern "C" {
+#define USE_BREAKITERATOR_POINTER
+#include "breakiterator_class.h"
+#include "../intl_convert.h"
+#include "../locale/locale.h"
+#include <zend_exceptions.h>
+}
+
+static zend_class_entry *IntlPartsIterator_ce_ptr;
+static zend_object_handlers IntlPartsIterator_handlers;
+
+/* BreakIterator's iterator */
+
+inline BreakIterator *_breakiter_prolog(zend_object_iterator *iter TSRMLS_DC)
+{
+ BreakIterator_object *bio;
+ bio = (BreakIterator_object*)zend_object_store_get_object(
+ (const zval*)iter->data TSRMLS_CC);
+ intl_errors_reset(BREAKITER_ERROR_P(bio) TSRMLS_CC);
+ if (bio->biter == NULL) {
+ intl_errors_set(BREAKITER_ERROR_P(bio), U_INVALID_STATE_ERROR,
+ "The BreakIterator object backing the PHP iterator is not "
+ "properly constructed", 0 TSRMLS_CC);
+ }
+ return bio->biter;
+}
+
+static void _breakiterator_destroy_it(zend_object_iterator *iter TSRMLS_DC)
+{
+ zval_ptr_dtor((zval**)&iter->data);
+}
+
+static void _breakiterator_move_forward(zend_object_iterator *iter TSRMLS_DC)
+{
+ BreakIterator *biter = _breakiter_prolog(iter TSRMLS_CC);
+ zoi_with_current *zoi_iter = (zoi_with_current*)iter;
+
+ iter->funcs->invalidate_current(iter TSRMLS_CC);
+
+ if (biter == NULL) {
+ return;
+ }
+
+ int32_t pos = biter->next();
+ if (pos != BreakIterator::DONE) {
+ MAKE_STD_ZVAL(zoi_iter->current);
+ ZVAL_LONG(zoi_iter->current, (long)pos);
+ } //else we've reached the end of the enum, nothing more is required
+}
+
+static void _breakiterator_rewind(zend_object_iterator *iter TSRMLS_DC)
+{
+ BreakIterator *biter = _breakiter_prolog(iter TSRMLS_CC);
+ zoi_with_current *zoi_iter = (zoi_with_current*)iter;
+
+ int32_t pos = biter->first();
+ MAKE_STD_ZVAL(zoi_iter->current);
+ ZVAL_LONG(zoi_iter->current, (long)pos);
+}
+
+static zend_object_iterator_funcs breakiterator_iterator_funcs = {
+ zoi_with_current_dtor,
+ zoi_with_current_valid,
+ zoi_with_current_get_current_data,
+ NULL,
+ _breakiterator_move_forward,
+ _breakiterator_rewind,
+ zoi_with_current_invalidate_current
+};
+
+U_CFUNC zend_object_iterator *_breakiterator_get_iterator(
+ zend_class_entry *ce, zval *object, int by_ref TSRMLS_DC)
+{
+ BreakIterator_object *bio;
+ if (by_ref) {
+ zend_throw_exception(NULL,
+ "Iteration by reference is not supported", 0 TSRMLS_CC);
+ return NULL;
+ }
+
+ bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
+ BreakIterator *biter = bio->biter;
+
+ if (biter == NULL) {
+ zend_throw_exception(NULL,
+ "The BreakIterator is not properly constructed", 0 TSRMLS_CC);
+ return NULL;
+ }
+
+ zoi_with_current *zoi_iter =
+ static_cast<zoi_with_current*>(emalloc(sizeof *zoi_iter));
+ zoi_iter->zoi.data = static_cast<void*>(object);
+ zoi_iter->zoi.funcs = &breakiterator_iterator_funcs;
+ zoi_iter->zoi.index = 0;
+ zoi_iter->destroy_it = _breakiterator_destroy_it;
+ zoi_iter->wrapping_obj = NULL; /* not used; object is in zoi.data */
+ zoi_iter->current = NULL;
+
+ zval_add_ref(&object);
+
+ return reinterpret_cast<zend_object_iterator *>(zoi_iter);
+}
+
+/* BreakIterator parts iterator */
+
+typedef struct zoi_break_iter_parts {
+ zoi_with_current zoi_cur;
+ parts_iter_key_type key_type;
+ BreakIterator_object *bio; /* so we don't have to fetch it all the time */
+} zoi_break_iter_parts;
+
+static void _breakiterator_parts_destroy_it(zend_object_iterator *iter TSRMLS_DC)
+{
+ zval_ptr_dtor(reinterpret_cast<zval**>(&iter->data));
+}
+
+static int _breakiterator_parts_get_current_key(zend_object_iterator *iter,
+ char **str_key,
+ uint *str_key_len,
+ ulong *int_key TSRMLS_DC)
+{
+ /* the actual work is done in move_forward and rewind */
+ *int_key = iter->index;
+ return HASH_KEY_IS_LONG;
+}
+
+static void _breakiterator_parts_move_forward(zend_object_iterator *iter TSRMLS_DC)
+{
+ zoi_break_iter_parts *zoi_bit = (zoi_break_iter_parts*)iter;
+ BreakIterator_object *bio = zoi_bit->bio;
+
+ iter->funcs->invalidate_current(iter TSRMLS_CC);
+
+ int32_t cur,
+ next;
+
+ cur = bio->biter->current();
+ if (cur == BreakIterator::DONE) {
+ return;
+ }
+ next = bio->biter->next();
+ if (next == BreakIterator::DONE) {
+ return;
+ }
+
+ if (zoi_bit->key_type == PARTS_ITERATOR_KEY_LEFT) {
+ iter->index = cur;
+ } else if (zoi_bit->key_type == PARTS_ITERATOR_KEY_RIGHT) {
+ iter->index = next;
+ }
+ /* else zoi_bit->key_type == PARTS_ITERATOR_KEY_SEQUENTIAL
+ * No need to do anything, the engine increments ->index */
+
+ const char *s = Z_STRVAL_P(bio->text);
+ int32_t slen = Z_STRLEN_P(bio->text),
+ len;
+ char *res;
+
+ if (next == BreakIterator::DONE) {
+ next = slen;
+ }
+ assert(next <= slen && next >= cur);
+ len = next - cur;
+ res = static_cast<char*>(emalloc(len + 1));
+
+ memcpy(res, &s[cur], len);
+ res[len] = '\0';
+
+ MAKE_STD_ZVAL(zoi_bit->zoi_cur.current);
+ ZVAL_STRINGL(zoi_bit->zoi_cur.current, res, len, 0);
+}
+
+static void _breakiterator_parts_rewind(zend_object_iterator *iter TSRMLS_DC)
+{
+ zoi_break_iter_parts *zoi_bit = (zoi_break_iter_parts*)iter;
+ BreakIterator_object *bio = zoi_bit->bio;
+
+ if (zoi_bit->zoi_cur.current) {
+ iter->funcs->invalidate_current(iter TSRMLS_CC);
+ }
+
+ bio->biter->first();
+
+ iter->funcs->move_forward(iter TSRMLS_CC);
+}
+
+static zend_object_iterator_funcs breakiterator_parts_it_funcs = {
+ zoi_with_current_dtor,
+ zoi_with_current_valid,
+ zoi_with_current_get_current_data,
+ _breakiterator_parts_get_current_key,
+ _breakiterator_parts_move_forward,
+ _breakiterator_parts_rewind,
+ zoi_with_current_invalidate_current
+};
+
+void IntlIterator_from_BreakIterator_parts(zval *break_iter_zv,
+ zval *object,
+ parts_iter_key_type key_type TSRMLS_DC)
+{
+ IntlIterator_object *ii;
+
+ zval_add_ref(&break_iter_zv);
+
+ object_init_ex(object, IntlPartsIterator_ce_ptr);
+ ii = (IntlIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
+
+ ii->iterator = (zend_object_iterator*)emalloc(sizeof(zoi_break_iter_parts));
+ ii->iterator->data = break_iter_zv;
+ ii->iterator->funcs = &breakiterator_parts_it_funcs;
+ ii->iterator->index = 0;
+ ((zoi_with_current*)ii->iterator)->destroy_it = _breakiterator_parts_destroy_it;
+ ((zoi_with_current*)ii->iterator)->wrapping_obj = object;
+ ((zoi_with_current*)ii->iterator)->current = NULL;
+
+ ((zoi_break_iter_parts*)ii->iterator)->bio = (BreakIterator_object*)
+ zend_object_store_get_object(break_iter_zv TSRMLS_CC);
+ assert(((zoi_break_iter_parts*)ii->iterator)->bio->biter != NULL);
+ ((zoi_break_iter_parts*)ii->iterator)->key_type = key_type;
+}
+
+U_CFUNC zend_object_value IntlPartsIterator_object_create(zend_class_entry *ce TSRMLS_DC)
+{
+ zend_object_value retval;
+
+ retval = IntlIterator_ce_ptr->create_object(ce TSRMLS_CC);
+ retval.handlers = &IntlPartsIterator_handlers;
+
+ return retval;
+}
+
+U_CFUNC zend_function *IntlPartsIterator_get_method(zval **object_ptr,
+ char *method, int method_len, const zend_literal *key TSRMLS_DC)
+{
+ zend_literal local_literal = {0};
+ zend_function *ret;
+ ALLOCA_FLAG(use_heap)
+
+ if (key == NULL) {
+ Z_STRVAL(local_literal.constant) = static_cast<char*>(
+ do_alloca(method_len + 1, use_heap));
+ zend_str_tolower_copy(Z_STRVAL(local_literal.constant),
+ method, method_len);
+ local_literal.hash_value = zend_hash_func(
+ Z_STRVAL(local_literal.constant), method_len + 1);
+ key = &local_literal;
+ }
+
+ if ((key->hash_value & 0xFFFFFFFF) == 0xA2B486A1 /* hash of getrulestatus\0 */
+ && method_len == sizeof("getrulestatus") - 1
+ && memcmp("getrulestatus", Z_STRVAL(key->constant), method_len) == 0) {
+ IntlIterator_object *obj = (IntlIterator_object*)
+ zend_object_store_get_object(*object_ptr TSRMLS_CC);
+ if (obj->iterator && obj->iterator->data) {
+ zval *break_iter_zv = static_cast<zval*>(obj->iterator->data);
+ *object_ptr = break_iter_zv;
+ ret = Z_OBJ_HANDLER_P(break_iter_zv, get_method)(object_ptr,
+ method, method_len, key TSRMLS_CC);
+ goto end;
+ }
+ }
+
+ ret = std_object_handlers.get_method(object_ptr,
+ method, method_len, key TSRMLS_CC);
+
+end:
+ if (key == &local_literal) {
+ free_alloca(Z_STRVAL(local_literal.constant), use_heap);
+ }
+
+ return ret;
+}
+
+U_CFUNC PHP_METHOD(IntlPartsIterator, getBreakIterator)
+{
+ INTLITERATOR_METHOD_INIT_VARS;
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "IntlPartsIterator::getBreakIterator: bad arguments", 0 TSRMLS_CC);
+ return;
+ }
+
+ INTLITERATOR_METHOD_FETCH_OBJECT;
+
+ zval *biter_zval = static_cast<zval*>(ii->iterator->data);
+ RETURN_ZVAL(biter_zval, 1, 0);
+}
+
+ZEND_BEGIN_ARG_INFO_EX(ainfo_parts_it_void, 0, 0, 0)
+ZEND_END_ARG_INFO()
+
+static const zend_function_entry IntlPartsIterator_class_functions[] = {
+ PHP_ME(IntlPartsIterator, getBreakIterator, ainfo_parts_it_void, ZEND_ACC_PUBLIC)
+ PHP_FE_END
+};
+
+U_CFUNC void breakiterator_register_IntlPartsIterator_class(TSRMLS_D)
+{
+ zend_class_entry ce;
+
+ /* Create and register 'BreakIterator' class. */
+ INIT_CLASS_ENTRY(ce, "IntlPartsIterator", IntlPartsIterator_class_functions);
+ IntlPartsIterator_ce_ptr = zend_register_internal_class_ex(&ce,
+ IntlIterator_ce_ptr, NULL TSRMLS_CC);
+ IntlPartsIterator_ce_ptr->create_object = IntlPartsIterator_object_create;
+
+ memcpy(&IntlPartsIterator_handlers, &IntlIterator_handlers,
+ sizeof IntlPartsIterator_handlers);
+ IntlPartsIterator_handlers.get_method = IntlPartsIterator_get_method;
+
+#define PARTSITER_DECL_LONG_CONST(name) \
+ zend_declare_class_constant_long(IntlPartsIterator_ce_ptr, #name, \
+ sizeof(#name) - 1, PARTS_ITERATOR_ ## name TSRMLS_CC)
+
+ PARTSITER_DECL_LONG_CONST(KEY_SEQUENTIAL);
+ PARTSITER_DECL_LONG_CONST(KEY_LEFT);
+ PARTSITER_DECL_LONG_CONST(KEY_RIGHT);
+
+#undef PARTSITER_DECL_LONG_CONST
+} \ No newline at end of file
diff --git a/ext/intl/breakiterator/breakiterator_iterators.h b/ext/intl/breakiterator/breakiterator_iterators.h
new file mode 100644
index 0000000000..7162072414
--- /dev/null
+++ b/ext/intl/breakiterator/breakiterator_iterators.h
@@ -0,0 +1,42 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+*/
+#ifndef INTL_BREAKITERATOR_ITERATORS_H
+#define INTL_BREAKITERATOR_ITERATORS_H
+
+#include <unicode/umachine.h>
+
+U_CDECL_BEGIN
+#include <math.h>
+#include <php.h>
+U_CDECL_END
+
+typedef enum {
+ PARTS_ITERATOR_KEY_SEQUENTIAL,
+ PARTS_ITERATOR_KEY_LEFT,
+ PARTS_ITERATOR_KEY_RIGHT,
+} parts_iter_key_type;
+
+#ifdef __cplusplus
+void IntlIterator_from_BreakIterator_parts(zval *break_iter_zv,
+ zval *object,
+ parts_iter_key_type key_type TSRMLS_DC);
+#endif
+
+U_CFUNC zend_object_iterator *_breakiterator_get_iterator(
+ zend_class_entry *ce, zval *object, int by_ref TSRMLS_DC);
+U_CFUNC void breakiterator_register_IntlPartsIterator_class(TSRMLS_D);
+
+#endif \ No newline at end of file
diff --git a/ext/intl/breakiterator/breakiterator_methods.cpp b/ext/intl/breakiterator/breakiterator_methods.cpp
new file mode 100644
index 0000000000..6a61f8cb93
--- /dev/null
+++ b/ext/intl/breakiterator/breakiterator_methods.cpp
@@ -0,0 +1,452 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <unicode/brkiter.h>
+#include "codepointiterator_internal.h"
+
+#include "breakiterator_iterators.h"
+
+extern "C" {
+#include "../php_intl.h"
+#define USE_BREAKITERATOR_POINTER 1
+#include "breakiterator_class.h"
+#include "../locale/locale.h"
+#include <zend_exceptions.h>
+}
+
+using PHP::CodePointBreakIterator;
+
+U_CFUNC PHP_METHOD(BreakIterator, __construct)
+{
+ zend_throw_exception( NULL,
+ "An object of this type cannot be created with the new operator",
+ 0 TSRMLS_CC );
+}
+
+static void _breakiter_factory(const char *func_name,
+ BreakIterator *(*func)(const Locale&, UErrorCode&),
+ INTERNAL_FUNCTION_PARAMETERS)
+{
+ BreakIterator *biter;
+ const char *locale_str = NULL;
+ int dummy;
+ char *msg;
+ UErrorCode status = UErrorCode();
+ intl_error_reset(NULL TSRMLS_CC);
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s!",
+ &locale_str, &dummy) == FAILURE) {
+ spprintf(&msg, NULL, "%s: bad arguments", func_name);
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC);
+ efree(msg);
+ RETURN_NULL();
+ }
+
+ if (locale_str == NULL) {
+ locale_str = intl_locale_get_default(TSRMLS_C);
+ }
+
+ biter = func(Locale::createFromName(locale_str), status);
+ intl_error_set_code(NULL, status TSRMLS_CC);
+ if (U_FAILURE(status)) {
+ spprintf(&msg, NULL, "%s: error creating BreakIterator",
+ func_name);
+ intl_error_set_custom_msg(NULL, msg, 1 TSRMLS_CC);
+ efree(msg);
+ RETURN_NULL();
+ }
+
+ breakiterator_object_create(return_value, biter TSRMLS_CC);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_create_word_instance)
+{
+ _breakiter_factory("breakiter_create_word_instance",
+ &BreakIterator::createWordInstance,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_create_line_instance)
+{
+ _breakiter_factory("breakiter_create_line_instance",
+ &BreakIterator::createLineInstance,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_create_character_instance)
+{
+ _breakiter_factory("breakiter_create_character_instance",
+ &BreakIterator::createCharacterInstance,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_create_sentence_instance)
+{
+ _breakiter_factory("breakiter_create_sentence_instance",
+ &BreakIterator::createSentenceInstance,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_create_title_instance)
+{
+ _breakiter_factory("breakiter_create_title_instance",
+ &BreakIterator::createTitleInstance,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_create_code_point_instance)
+{
+ UErrorCode status = UErrorCode();
+ intl_error_reset(NULL TSRMLS_CC);
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_create_code_point_instance: bad arguments", 0 TSRMLS_CC);
+ RETURN_NULL();
+ }
+
+ CodePointBreakIterator *cpbi = new CodePointBreakIterator();
+ breakiterator_object_create(return_value, cpbi TSRMLS_CC);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_get_text)
+{
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_get_text: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ if (bio->text == NULL) {
+ RETURN_NULL();
+ } else {
+ RETURN_ZVAL(bio->text, 1, 0);
+ }
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_set_text)
+{
+ char *text;
+ int text_len;
+ UText *ut = NULL;
+ zval **textzv;
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s",
+ &text, &text_len) == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_set_text: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ int res = zend_get_parameters_ex(1, &textzv);
+ assert(res == SUCCESS);
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ /* assert it's safe to use text and text_len because zpp changes the
+ * arguments in the stack */
+ assert(text == Z_STRVAL_PP(textzv));
+
+ ut = utext_openUTF8(ut, text, text_len, BREAKITER_ERROR_CODE_P(bio));
+ INTL_CTOR_CHECK_STATUS(bio, "breakiter_set_text: error opening UText");
+
+ bio->biter->setText(ut, BREAKITER_ERROR_CODE(bio));
+ utext_close(ut); /* ICU shallow clones the UText */
+ INTL_CTOR_CHECK_STATUS(bio, "breakiter_set_text: error calling "
+ "BreakIterator::setText()");
+
+ /* When ICU clones the UText, it does not copy the buffer, so we have to
+ * keep the string buffer around by holding a reference to its zval. This
+ * also allows a faste implementation of getText() */
+ if (bio->text != NULL) {
+ zval_ptr_dtor(&bio->text);
+ }
+ bio->text = *textzv;
+ zval_add_ref(&bio->text);
+
+ RETURN_TRUE;
+}
+
+static void _breakiter_no_args_ret_int32(
+ const char *func_name,
+ int32_t (BreakIterator::*func)(),
+ INTERNAL_FUNCTION_PARAMETERS)
+{
+ char *msg;
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ spprintf(&msg, NULL, "%s: bad arguments", func_name);
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC);
+ efree(msg);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ int32_t res = (bio->biter->*func)();
+
+ RETURN_LONG((long)res);
+}
+
+static void _breakiter_int32_ret_int32(
+ const char *func_name,
+ int32_t (BreakIterator::*func)(int32_t),
+ INTERNAL_FUNCTION_PARAMETERS)
+{
+ char *msg;
+ long arg;
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &arg) == FAILURE) {
+ spprintf(&msg, NULL, "%s: bad arguments", func_name);
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC);
+ efree(msg);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ if (arg < INT32_MIN || arg > INT32_MAX) {
+ spprintf(&msg, NULL, "%s: offset argument is outside bounds of "
+ "a 32-bit wide integer", func_name);
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, msg, 1 TSRMLS_CC);
+ efree(msg);
+ RETURN_FALSE;
+ }
+
+ int32_t res = (bio->biter->*func)((int32_t)arg);
+
+ RETURN_LONG((long)res);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_first)
+{
+ _breakiter_no_args_ret_int32("breakiter_first",
+ &BreakIterator::first,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_last)
+{
+ _breakiter_no_args_ret_int32("breakiter_last",
+ &BreakIterator::last,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_previous)
+{
+ _breakiter_no_args_ret_int32("breakiter_previous",
+ &BreakIterator::previous,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_next)
+{
+ bool no_arg_version = false;
+
+ if (ZEND_NUM_ARGS() == 0) {
+ no_arg_version = true;
+ } else if (ZEND_NUM_ARGS() == 1) {
+ zval **arg;
+ int res = zend_get_parameters_ex(1, &arg);
+ assert(res == SUCCESS);
+ if (Z_TYPE_PP(arg) == IS_NULL) {
+ no_arg_version = true;
+ ht = 0; /* pretend we don't have any argument */
+ } else {
+ no_arg_version = false;
+ }
+ }
+
+ if (no_arg_version) {
+ _breakiter_no_args_ret_int32("breakiter_next",
+ &BreakIterator::next,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+ } else {
+ _breakiter_int32_ret_int32("breakiter_next",
+ &BreakIterator::next,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+ }
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_current)
+{
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_current: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ int32_t res = bio->biter->current();
+
+ RETURN_LONG((long)res);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_following)
+{
+ _breakiter_int32_ret_int32("breakiter_following",
+ &BreakIterator::following,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_preceding)
+{
+ _breakiter_int32_ret_int32("breakiter_preceding",
+ &BreakIterator::preceding,
+ INTERNAL_FUNCTION_PARAM_PASSTHRU);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_is_boundary)
+{
+ long offset;
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l",
+ &offset) == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_is_boundary: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ if (offset < INT32_MIN || offset > INT32_MAX) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_is_boundary: offset argument is outside bounds of "
+ "a 32-bit wide integer", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ UBool res = bio->biter->isBoundary((int32_t)offset);
+
+ RETURN_BOOL((long)res);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_get_locale)
+{
+ long locale_type;
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &locale_type) == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_get_locale: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ if (locale_type != ULOC_ACTUAL_LOCALE && locale_type != ULOC_VALID_LOCALE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_get_locale: invalid locale type", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ Locale locale = bio->biter->getLocale((ULocDataLocaleType)locale_type,
+ BREAKITER_ERROR_CODE(bio));
+ INTL_METHOD_CHECK_STATUS(bio,
+ "breakiter_get_locale: Call to ICU method has failed");
+
+ RETURN_STRING(locale.getName(), 1);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_get_parts_iterator)
+{
+ long key_type = 0;
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|l", &key_type) == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_get_parts_iterator: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ if (key_type != PARTS_ITERATOR_KEY_SEQUENTIAL
+ && key_type != PARTS_ITERATOR_KEY_LEFT
+ && key_type != PARTS_ITERATOR_KEY_RIGHT) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_get_parts_iterator: bad key type", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ IntlIterator_from_BreakIterator_parts(
+ object, return_value, (parts_iter_key_type)key_type TSRMLS_CC);
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_get_error_code)
+{
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_get_error_code: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ /* Fetch the object (without resetting its last error code ). */
+ bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
+ if (bio == NULL)
+ RETURN_FALSE;
+
+ RETURN_LONG((long)BREAKITER_ERROR_CODE(bio));
+}
+
+U_CFUNC PHP_FUNCTION(breakiter_get_error_message)
+{
+ const char* message = NULL;
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "breakiter_get_error_message: bad arguments", 0 TSRMLS_CC );
+ RETURN_FALSE;
+ }
+
+
+ /* Fetch the object (without resetting its last error code ). */
+ bio = (BreakIterator_object*)zend_object_store_get_object(object TSRMLS_CC);
+ if (bio == NULL)
+ RETURN_FALSE;
+
+ /* Return last error message. */
+ message = intl_error_get_message(BREAKITER_ERROR_P(bio) TSRMLS_CC);
+ RETURN_STRING(message, 0);
+}
diff --git a/ext/intl/breakiterator/breakiterator_methods.h b/ext/intl/breakiterator/breakiterator_methods.h
new file mode 100644
index 0000000000..a479ac92e8
--- /dev/null
+++ b/ext/intl/breakiterator/breakiterator_methods.h
@@ -0,0 +1,64 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifndef BREAKITERATOR_METHODS_H
+#define BREAKITERATOR_METHODS_H
+
+#include <php.h>
+
+PHP_METHOD(BreakIterator, __construct);
+
+PHP_FUNCTION(breakiter_create_word_instance);
+
+PHP_FUNCTION(breakiter_create_line_instance);
+
+PHP_FUNCTION(breakiter_create_character_instance);
+
+PHP_FUNCTION(breakiter_create_sentence_instance);
+
+PHP_FUNCTION(breakiter_create_title_instance);
+
+PHP_FUNCTION(breakiter_create_code_point_instance);
+
+PHP_FUNCTION(breakiter_get_text);
+
+PHP_FUNCTION(breakiter_set_text);
+
+PHP_FUNCTION(breakiter_first);
+
+PHP_FUNCTION(breakiter_last);
+
+PHP_FUNCTION(breakiter_previous);
+
+PHP_FUNCTION(breakiter_next);
+
+PHP_FUNCTION(breakiter_current);
+
+PHP_FUNCTION(breakiter_following);
+
+PHP_FUNCTION(breakiter_preceding);
+
+PHP_FUNCTION(breakiter_is_boundary);
+
+PHP_FUNCTION(breakiter_get_locale);
+
+PHP_FUNCTION(breakiter_get_parts_iterator);
+
+PHP_FUNCTION(breakiter_get_error_code);
+
+PHP_FUNCTION(breakiter_get_error_message);
+
+#endif \ No newline at end of file
diff --git a/ext/intl/breakiterator/codepointiterator_internal.cpp b/ext/intl/breakiterator/codepointiterator_internal.cpp
new file mode 100644
index 0000000000..bf9239d531
--- /dev/null
+++ b/ext/intl/breakiterator/codepointiterator_internal.cpp
@@ -0,0 +1,291 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+ */
+
+#include "codepointiterator_internal.h"
+#include <unicode/uchriter.h>
+#include <typeinfo>
+
+//copied from cmemory.h, which is not public
+typedef union {
+ long t1;
+ double t2;
+ void *t3;
+} UAlignedMemory;
+
+#define U_POINTER_MASK_LSB(ptr, mask) (((ptrdiff_t)(char *)(ptr)) & (mask))
+#define U_ALIGNMENT_OFFSET(ptr) U_POINTER_MASK_LSB(ptr, sizeof(UAlignedMemory) - 1)
+#define U_ALIGNMENT_OFFSET_UP(ptr) (sizeof(UAlignedMemory) - U_ALIGNMENT_OFFSET(ptr))
+
+using namespace PHP;
+
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(CodePointBreakIterator);
+
+CodePointBreakIterator::CodePointBreakIterator()
+: BreakIterator(), fCharIter(NULL), lastCodePoint(U_SENTINEL)
+{
+ UErrorCode uec = UErrorCode();
+ this->fText = utext_openUChars(NULL, NULL, 0, &uec);
+}
+
+CodePointBreakIterator::CodePointBreakIterator(const PHP::CodePointBreakIterator &other)
+: BreakIterator(other), fText(NULL), fCharIter(NULL), lastCodePoint(U_SENTINEL)
+{
+ *this = other;
+}
+
+CodePointBreakIterator& CodePointBreakIterator::operator=(const CodePointBreakIterator& that)
+{
+ UErrorCode uec = UErrorCode();
+ UText *ut_clone = NULL;
+
+ if (this == &that) {
+ return *this;
+ }
+
+ this->fText = utext_clone(this->fText, that.fText, FALSE, TRUE, &uec);
+
+ //don't bother copying the character iterator, getText() is deprecated
+ clearCurrentCharIter();
+
+ this->lastCodePoint = that.lastCodePoint;
+ return *this;
+}
+
+CodePointBreakIterator::~CodePointBreakIterator()
+{
+ if (this->fText) {
+ utext_close(this->fText);
+ }
+ clearCurrentCharIter();
+}
+
+UBool CodePointBreakIterator::operator==(const BreakIterator& that) const
+{
+ if (typeid(*this) != typeid(that)) {
+ return FALSE;
+ }
+
+ const CodePointBreakIterator& that2 =
+ static_cast<const CodePointBreakIterator&>(that);
+
+ if (!utext_equals(this->fText, that2.fText)) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+CodePointBreakIterator* CodePointBreakIterator::clone(void) const
+{
+ return new CodePointBreakIterator(*this);
+}
+
+CharacterIterator& CodePointBreakIterator::getText(void) const
+{
+ if (this->fCharIter == NULL) {
+ //this method is deprecated anyway; setup bogus iterator
+ static const UChar c = 0;
+ this->fCharIter = new UCharCharacterIterator(&c, 0);
+ }
+
+ return *this->fCharIter;
+}
+
+UText *CodePointBreakIterator::getUText(UText *fillIn, UErrorCode &status) const
+{
+ return utext_clone(fillIn, this->fText, FALSE, TRUE, &status);
+}
+
+void CodePointBreakIterator::setText(const UnicodeString &text)
+{
+ UErrorCode uec = UErrorCode();
+
+ //this closes the previous utext, if any
+ this->fText = utext_openConstUnicodeString(this->fText, &text, &uec);
+
+ clearCurrentCharIter();
+}
+
+void CodePointBreakIterator::setText(UText *text, UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return;
+ }
+
+ this->fText = utext_clone(this->fText, text, FALSE, TRUE, &status);
+
+ clearCurrentCharIter();
+}
+
+void CodePointBreakIterator::adoptText(CharacterIterator* it)
+{
+ UErrorCode uec = UErrorCode();
+ clearCurrentCharIter();
+
+ this->fCharIter = it;
+ this->fText = utext_openCharacterIterator(this->fText, it, &uec);
+}
+
+int32_t CodePointBreakIterator::first(void)
+{
+ UTEXT_SETNATIVEINDEX(this->fText, 0);
+ this->lastCodePoint = U_SENTINEL;
+
+ return 0;
+}
+
+int32_t CodePointBreakIterator::last(void)
+{
+ int32_t pos = (int32_t)utext_nativeLength(this->fText);
+ UTEXT_SETNATIVEINDEX(this->fText, pos);
+ this->lastCodePoint = U_SENTINEL;
+
+ return pos;
+}
+
+int32_t CodePointBreakIterator::previous(void)
+{
+ this->lastCodePoint = UTEXT_PREVIOUS32(this->fText);
+ if (this->lastCodePoint == U_SENTINEL) {
+ return BreakIterator::DONE;
+ }
+
+ return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
+}
+
+int32_t CodePointBreakIterator::next(void)
+{
+ this->lastCodePoint = UTEXT_NEXT32(this->fText);
+ if (this->lastCodePoint == U_SENTINEL) {
+ return BreakIterator::DONE;
+ }
+
+ return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
+}
+
+int32_t CodePointBreakIterator::current(void) const
+{
+ return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
+}
+
+int32_t CodePointBreakIterator::following(int32_t offset)
+{
+ this->lastCodePoint = utext_next32From(this->fText, offset);
+ if (this->lastCodePoint == U_SENTINEL) {
+ return BreakIterator::DONE;
+ }
+
+ return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
+}
+
+int32_t CodePointBreakIterator::preceding(int32_t offset)
+{
+ this->lastCodePoint = utext_previous32From(this->fText, offset);
+ if (this->lastCodePoint == U_SENTINEL) {
+ return BreakIterator::DONE;
+ }
+
+ return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
+}
+
+UBool CodePointBreakIterator::isBoundary(int32_t offset)
+{
+ //this function has side effects, and it's supposed to
+ utext_setNativeIndex(this->fText, offset);
+ return (offset == utext_getNativeIndex(this->fText));
+}
+
+int32_t CodePointBreakIterator::next(int32_t n)
+{
+ UBool res = utext_moveIndex32(this->fText, n);
+
+#ifndef UTEXT_CURRENT32
+#define UTEXT_CURRENT32 utext_current32
+#endif
+
+ if (res) {
+ this->lastCodePoint = UTEXT_CURRENT32(this->fText);
+ return (int32_t)UTEXT_GETNATIVEINDEX(this->fText);
+ } else {
+ this->lastCodePoint = U_SENTINEL;
+ return BreakIterator::DONE;
+ }
+}
+
+CodePointBreakIterator *CodePointBreakIterator::createBufferClone(
+ void *stackBuffer, int32_t &bufferSize, UErrorCode &status)
+{
+ //see implementation of RuleBasedBreakIterator::createBufferClone()
+ if (U_FAILURE(status)) {
+ return NULL;
+ }
+
+ if (bufferSize <= 0) {
+ bufferSize = sizeof(CodePointBreakIterator) + U_ALIGNMENT_OFFSET_UP(0);
+ return NULL;
+ }
+
+ char *buf = (char*)stackBuffer;
+ uint32_t s = bufferSize;
+
+ if (stackBuffer == NULL) {
+ s = 0;
+ }
+
+ if (U_ALIGNMENT_OFFSET(stackBuffer) != 0) {
+ uint32_t offsetUp = (uint32_t)U_ALIGNMENT_OFFSET_UP(buf);
+ s -= offsetUp;
+ buf += offsetUp;
+ }
+
+ if (s < sizeof(CodePointBreakIterator)) {
+ CodePointBreakIterator *clonedBI = new CodePointBreakIterator(*this);
+ if (clonedBI == NULL) {
+ status = U_MEMORY_ALLOCATION_ERROR;
+ } else {
+ status = U_SAFECLONE_ALLOCATED_WARNING;
+ }
+
+ return clonedBI;
+ }
+
+ return new(buf) CodePointBreakIterator(*this);
+}
+
+CodePointBreakIterator &CodePointBreakIterator::refreshInputText(UText *input, UErrorCode &status)
+{
+ //see implementation of RuleBasedBreakIterator::createBufferClone()
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+ if (input == NULL) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ return *this;
+ }
+
+ int64_t pos = utext_getNativeIndex(this->fText);
+ this->fText = utext_clone(this->fText, input, FALSE, TRUE, &status);
+ if (U_FAILURE(status)) {
+ return *this;
+ }
+
+ utext_setNativeIndex(this->fText, pos);
+ if (utext_getNativeIndex(fText) != pos) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+
+ return *this;
+}
diff --git a/ext/intl/breakiterator/codepointiterator_internal.h b/ext/intl/breakiterator/codepointiterator_internal.h
new file mode 100644
index 0000000000..988b91c200
--- /dev/null
+++ b/ext/intl/breakiterator/codepointiterator_internal.h
@@ -0,0 +1,98 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifndef CODEPOINTITERATOR_INTERNAL_H
+#define CODEPOINTITERATOR_INTERNAL_H
+
+#include <unicode/brkiter.h>
+
+using U_ICU_NAMESPACE::BreakIterator;
+
+namespace PHP {
+
+ class CodePointBreakIterator : public BreakIterator {
+
+ public:
+ static UClassID getStaticClassID();
+
+ CodePointBreakIterator();
+
+ CodePointBreakIterator(const CodePointBreakIterator &other);
+
+ CodePointBreakIterator& operator=(const CodePointBreakIterator& that);
+
+ virtual ~CodePointBreakIterator();
+
+ virtual UBool operator==(const BreakIterator& that) const;
+
+ virtual CodePointBreakIterator* clone(void) const;
+
+ virtual UClassID getDynamicClassID(void) const;
+
+ virtual CharacterIterator& getText(void) const;
+
+ virtual UText *getUText(UText *fillIn, UErrorCode &status) const;
+
+ virtual void setText(const UnicodeString &text);
+
+ virtual void setText(UText *text, UErrorCode &status);
+
+ virtual void adoptText(CharacterIterator* it);
+
+ virtual int32_t first(void);
+
+ virtual int32_t last(void);
+
+ virtual int32_t previous(void);
+
+ virtual int32_t next(void);
+
+ virtual int32_t current(void) const;
+
+ virtual int32_t following(int32_t offset);
+
+ virtual int32_t preceding(int32_t offset);
+
+ virtual UBool isBoundary(int32_t offset);
+
+ virtual int32_t next(int32_t n);
+
+ virtual CodePointBreakIterator *createBufferClone(void *stackBuffer,
+ int32_t &BufferSize,
+ UErrorCode &status);
+
+ virtual CodePointBreakIterator &refreshInputText(UText *input, UErrorCode &status);
+
+ inline UChar32 getLastCodePoint()
+ {
+ return this->lastCodePoint;
+ }
+
+ private:
+ UText *fText;
+ UChar32 lastCodePoint;
+ mutable CharacterIterator *fCharIter;
+
+ inline void clearCurrentCharIter()
+ {
+ delete this->fCharIter;
+ this->fCharIter = NULL;
+ this->lastCodePoint = U_SENTINEL;
+ }
+ };
+}
+
+#endif \ No newline at end of file
diff --git a/ext/intl/breakiterator/codepointiterator_methods.cpp b/ext/intl/breakiterator/codepointiterator_methods.cpp
new file mode 100644
index 0000000000..ae7e526ead
--- /dev/null
+++ b/ext/intl/breakiterator/codepointiterator_methods.cpp
@@ -0,0 +1,44 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+ */
+
+#include "codepointiterator_internal.h"
+
+extern "C" {
+#define USE_BREAKITERATOR_POINTER 1
+#include "breakiterator_class.h"
+}
+
+using PHP::CodePointBreakIterator;
+
+static inline CodePointBreakIterator *fetch_cpbi(BreakIterator_object *bio) {
+ return (CodePointBreakIterator*)bio->biter;
+}
+
+U_CFUNC PHP_FUNCTION(cpbi_get_last_code_point)
+{
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "cpbi_get_last_code_point: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ RETURN_LONG(fetch_cpbi(bio)->getLastCodePoint());
+} \ No newline at end of file
diff --git a/ext/intl/breakiterator/codepointiterator_methods.h b/ext/intl/breakiterator/codepointiterator_methods.h
new file mode 100644
index 0000000000..d34e5b61e2
--- /dev/null
+++ b/ext/intl/breakiterator/codepointiterator_methods.h
@@ -0,0 +1,24 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifndef CODEPOINTITERATOR_METHODS_H
+#define CODEPOINTITERATOR_METHODS_H
+
+#include <php.h>
+
+PHP_FUNCTION(cpbi_get_last_code_point);
+
+#endif \ No newline at end of file
diff --git a/ext/intl/breakiterator/rulebasedbreakiterator_methods.cpp b/ext/intl/breakiterator/rulebasedbreakiterator_methods.cpp
new file mode 100644
index 0000000000..454e5249fd
--- /dev/null
+++ b/ext/intl/breakiterator/rulebasedbreakiterator_methods.cpp
@@ -0,0 +1,221 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+ */
+
+#include <unicode/rbbi.h>
+
+extern "C" {
+#define USE_BREAKITERATOR_POINTER 1
+#include "breakiterator_class.h"
+#include <zend_exceptions.h>
+#include <limits.h>
+}
+
+#include "../intl_convertcpp.h"
+
+static inline RuleBasedBreakIterator *fetch_rbbi(BreakIterator_object *bio) {
+ return (RuleBasedBreakIterator*)bio->biter;
+}
+
+static void _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAMETERS)
+{
+ zval *object = getThis();
+ char *rules;
+ int rules_len;
+ zend_bool compiled = 0;
+ UErrorCode status = U_ZERO_ERROR;
+ intl_error_reset(NULL TSRMLS_CC);
+
+ if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|b",
+ &rules, &rules_len, &compiled) == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "rbbi_create_instance: bad arguments", 0 TSRMLS_CC);
+ RETURN_NULL();
+ }
+
+ // instantiation of ICU object
+ RuleBasedBreakIterator *rbbi;
+
+ if (!compiled) {
+ UnicodeString rulesStr;
+ UParseError parseError = UParseError();
+ if (intl_stringFromChar(rulesStr, rules, rules_len, &status)
+ == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "rbbi_create_instance: rules were not a valid UTF-8 string",
+ 0 TSRMLS_CC);
+ RETURN_NULL();
+ }
+
+ rbbi = new RuleBasedBreakIterator(rulesStr, parseError, status);
+ intl_error_set_code(NULL, status TSRMLS_CC);
+ if (U_FAILURE(status)) {
+ char *msg;
+ smart_str parse_error_str;
+ parse_error_str = intl_parse_error_to_string(&parseError);
+ spprintf(&msg, 0, "rbbi_create_instance: unable to create "
+ "RuleBasedBreakIterator from rules (%s)", parse_error_str.c);
+ smart_str_free(&parse_error_str);
+ intl_error_set_custom_msg(NULL, msg, 1 TSRMLS_CC);
+ efree(msg);
+ delete rbbi;
+ RETURN_NULL();
+ }
+ } else { // compiled
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
+ rbbi = new RuleBasedBreakIterator((uint8_t*)rules, rules_len, status);
+ if (U_FAILURE(status)) {
+ intl_error_set(NULL, status, "rbbi_create_instance: unable to "
+ "create instance from compiled rules", 0 TSRMLS_CC);
+ delete rbbi;
+ RETURN_NULL();
+ }
+#else
+ intl_error_set(NULL, U_UNSUPPORTED_ERROR, "rbbi_create_instance: "
+ "compiled rules require ICU >= 4.8", 0 TSRMLS_CC);
+ RETURN_NULL();
+#endif
+ }
+
+ breakiterator_object_create(return_value, rbbi TSRMLS_CC);
+}
+
+U_CFUNC PHP_METHOD(IntlRuleBasedBreakIterator, __construct)
+{
+ zval orig_this = *getThis();
+
+ return_value = getThis();
+ //changes this to IS_NULL (without first destroying) if there's an error
+ _php_intlrbbi_constructor_body(INTERNAL_FUNCTION_PARAM_PASSTHRU);
+
+ if (Z_TYPE_P(return_value) == IS_NULL) {
+ zend_object_store_ctor_failed(&orig_this TSRMLS_CC);
+ zval_dtor(&orig_this);
+ }
+}
+
+U_CFUNC PHP_FUNCTION(rbbi_get_rules)
+{
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "rbbi_get_rules: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ const UnicodeString rules = fetch_rbbi(bio)->getRules();
+
+ Z_TYPE_P(return_value) = IS_STRING;
+ if (intl_charFromString(rules, &Z_STRVAL_P(return_value),
+ &Z_STRLEN_P(return_value), BREAKITER_ERROR_CODE_P(bio)) == FAILURE)
+ {
+ intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
+ "rbbi_hash_code: Error converting result to UTF-8 string",
+ 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+}
+
+U_CFUNC PHP_FUNCTION(rbbi_get_rule_status)
+{
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "rbbi_get_rule_status: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ RETURN_LONG(fetch_rbbi(bio)->getRuleStatus());
+}
+
+U_CFUNC PHP_FUNCTION(rbbi_get_rule_status_vec)
+{
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "rbbi_get_rule_status_vec: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ int32_t num_rules = fetch_rbbi(bio)->getRuleStatusVec(NULL, 0,
+ BREAKITER_ERROR_CODE(bio));
+ if (BREAKITER_ERROR_CODE(bio) == U_BUFFER_OVERFLOW_ERROR) {
+ BREAKITER_ERROR_CODE(bio) = U_ZERO_ERROR;
+ } else {
+ // should not happen
+ INTL_METHOD_CHECK_STATUS(bio, "rbbi_get_rule_status_vec: failed "
+ " determining the number of status values");
+ }
+ int32_t *rules = new int32_t[num_rules];
+ num_rules = fetch_rbbi(bio)->getRuleStatusVec(rules, num_rules,
+ BREAKITER_ERROR_CODE(bio));
+ if (U_FAILURE(BREAKITER_ERROR_CODE(bio))) {
+ delete[] rules;
+ intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
+ "rbbi_get_rule_status_vec: failed obtaining the status values",
+ 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ array_init_size(return_value, num_rules);
+ for (int32_t i = 0; i < num_rules; i++) {
+ add_next_index_long(return_value, rules[i]);
+ }
+ delete[] rules;
+}
+
+#if U_ICU_VERSION_MAJOR_NUM * 10 + U_ICU_VERSION_MINOR_NUM >= 48
+U_CFUNC PHP_FUNCTION(rbbi_get_binary_rules)
+{
+ BREAKITER_METHOD_INIT_VARS;
+ object = getThis();
+
+ if (zend_parse_parameters_none() == FAILURE) {
+ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR,
+ "rbbi_get_binary_rules: bad arguments", 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ BREAKITER_METHOD_FETCH_OBJECT;
+
+ uint32_t rules_len;
+ const uint8_t *rules = fetch_rbbi(bio)->getBinaryRules(rules_len);
+
+ if (rules_len > INT_MAX - 1) {
+ intl_errors_set(BREAKITER_ERROR_P(bio), BREAKITER_ERROR_CODE(bio),
+ "rbbi_get_binary_rules: the rules are too large",
+ 0 TSRMLS_CC);
+ RETURN_FALSE;
+ }
+
+ char *ret_rules = static_cast<char*>(emalloc(rules_len + 1));
+ memcpy(ret_rules, rules, rules_len);
+ ret_rules[rules_len] = '\0';
+
+ RETURN_STRINGL(ret_rules, rules_len, 0);
+}
+#endif
diff --git a/ext/intl/breakiterator/rulebasedbreakiterator_methods.h b/ext/intl/breakiterator/rulebasedbreakiterator_methods.h
new file mode 100644
index 0000000000..edea4ea2a6
--- /dev/null
+++ b/ext/intl/breakiterator/rulebasedbreakiterator_methods.h
@@ -0,0 +1,32 @@
+/*
+ +----------------------------------------------------------------------+
+ | PHP Version 5 |
+ +----------------------------------------------------------------------+
+ | This source file is subject to version 3.01 of the PHP license, |
+ | that is bundled with this package in the file LICENSE, and is |
+ | available through the world-wide-web at the following url: |
+ | http://www.php.net/license/3_01.txt |
+ | If you did not receive a copy of the PHP license and are unable to |
+ | obtain it through the world-wide-web, please send a note to |
+ | license@php.net so we can mail you a copy immediately. |
+ +----------------------------------------------------------------------+
+ | Authors: Gustavo Lopes <cataphract@php.net> |
+ +----------------------------------------------------------------------+
+ */
+
+#ifndef RULEBASEDBREAKITERATOR_METHODS_H
+#define RULEBASEDBREAKITERATOR_METHODS_H
+
+#include <php.h>
+
+PHP_METHOD(IntlRuleBasedBreakIterator, __construct);
+
+PHP_FUNCTION(rbbi_get_rules);
+
+PHP_FUNCTION(rbbi_get_rule_status);
+
+PHP_FUNCTION(rbbi_get_rule_status_vec);
+
+PHP_FUNCTION(rbbi_get_binary_rules);
+
+#endif \ No newline at end of file