From 97eff7eb57fc2320c267a949cffd622c38712484 Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Sun, 22 May 2016 17:49:02 -0700 Subject: Fix bug #72241: get_icu_value_internal out-of-bounds read --- ext/intl/locale/locale_methods.c | 235 ++++++++++++++++++++------------------- ext/intl/tests/bug72241.phpt | 14 +++ 2 files changed, 132 insertions(+), 117 deletions(-) create mode 100644 ext/intl/tests/bug72241.phpt diff --git a/ext/intl/locale/locale_methods.c b/ext/intl/locale/locale_methods.c index c8159bcd5a..31f60b39a4 100644 --- a/ext/intl/locale/locale_methods.c +++ b/ext/intl/locale/locale_methods.c @@ -65,26 +65,26 @@ ZEND_EXTERN_MODULE_GLOBALS( intl ) */ static const char * const LOC_GRANDFATHERED[] = { "art-lojban", "i-klingon", "i-lux", "i-navajo", "no-bok", "no-nyn", - "cel-gaulish", "en-GB-oed", "i-ami", - "i-bnn", "i-default", "i-enochian", - "i-mingo", "i-pwn", "i-tao", + "cel-gaulish", "en-GB-oed", "i-ami", + "i-bnn", "i-default", "i-enochian", + "i-mingo", "i-pwn", "i-tao", "i-tay", "i-tsu", "sgn-BE-fr", "sgn-BE-nl", "sgn-CH-de", "zh-cmn", "zh-cmn-Hans", "zh-cmn-Hant", "zh-gan" , "zh-guoyu", "zh-hakka", "zh-min", - "zh-min-nan", "zh-wuu", "zh-xiang", + "zh-min-nan", "zh-wuu", "zh-xiang", "zh-yue", NULL }; /* Based on IANA registry at the time of writing this code * This array lists the preferred values for the grandfathered tags if applicable -* This is in sync with the array LOC_GRANDFATHERED +* This is in sync with the array LOC_GRANDFATHERED * e.g. the offsets of the grandfathered tags match the offset of the preferred value */ static const int LOC_PREFERRED_GRANDFATHERED_LEN = 6; static const char * const LOC_PREFERRED_GRANDFATHERED[] = { "jbo", "tlh", "lb", - "nv", "nb", "nn", + "nv", "nb", "nn", NULL }; @@ -122,7 +122,7 @@ static int16_t findOffset(const char* const* list, const char* key) /*}}}*/ static char* getPreferredTag(const char* gf_tag) -{ +{ char* result = NULL; int grOffset = 0; @@ -141,15 +141,15 @@ static char* getPreferredTag(const char* gf_tag) } /* {{{ -* returns the position of next token for lookup +* returns the position of next token for lookup * or -1 if no token -* strtokr equivalent search for token in reverse direction +* strtokr equivalent search for token in reverse direction */ static int getStrrtokenPos(char* str, int savedPos) { int result =-1; int i; - + for(i=savedPos-1; i>=0; i--) { if(isIDSeparator(*(str+i)) ){ /* delimiter found; check for singleton */ @@ -171,7 +171,7 @@ static int getStrrtokenPos(char* str, int savedPos) /* }}} */ /* {{{ -* returns the position of a singleton if present +* returns the position of a singleton if present * returns -1 if no singleton * strtok equivalent search for singleton */ @@ -180,7 +180,7 @@ static int getSingletonPos(const char* str) int result =-1; int i=0; int len = 0; - + if( str && ((len=strlen(str))>0) ){ for( i=0; ic, (s)->len, 0) -/* {{{ proto static string Locale::composeLocale($array) -* Creates a locale by combining the parts of locale-ID passed +/* {{{ proto static string Locale::composeLocale($array) +* Creates a locale by combining the parts of locale-ID passed * }}} */ -/* {{{ proto static string compose_locale($array) -* Creates a locale by combining the parts of locale-ID passed +/* {{{ proto static string compose_locale($array) +* Creates a locale by combining the parts of locale-ID passed * }}} */ PHP_FUNCTION(locale_compose) { @@ -920,7 +921,7 @@ PHP_FUNCTION(locale_compose) RETURN_FALSE; /* Check for grandfathered first */ - result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG); + result = append_key_value(loc_name, hash_arr, LOC_GRANDFATHERED_LANG_TAG); if( result == SUCCESS){ RETURN_SMART_STR(loc_name); } @@ -929,7 +930,7 @@ PHP_FUNCTION(locale_compose) } /* Not grandfathered */ - result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG); + result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG); if( result == LOC_NOT_FOUND ){ intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC ); @@ -947,11 +948,11 @@ PHP_FUNCTION(locale_compose) } /* Script */ - result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG); + result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG); if( !handleAppendResult( result, loc_name TSRMLS_CC)){ RETURN_FALSE; } - + /* Region */ result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG); if( !handleAppendResult( result, loc_name TSRMLS_CC)){ @@ -959,7 +960,7 @@ PHP_FUNCTION(locale_compose) } /* Variant */ - result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC); + result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC); if( !handleAppendResult( result, loc_name TSRMLS_CC)){ RETURN_FALSE; } @@ -985,16 +986,16 @@ static char* get_private_subtags(const char* loc_name) { char* result =NULL; int singletonPos = 0; - int len =0; + int len =0; const char* mod_loc_name =NULL; if( loc_name && (len = strlen(loc_name)>0 ) ){ - mod_loc_name = loc_name ; + mod_loc_name = loc_name ; len = strlen(mod_loc_name); while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){ - if( singletonPos!=-1){ - if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){ + if( singletonPos!=-1){ + if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){ /* private subtag start found */ if( singletonPos + 2 == len){ /* loc_name ends with '-x-' ; return NULL */ @@ -1019,7 +1020,7 @@ static char* get_private_subtags(const char* loc_name) } /* end of while */ } - + return result; } /* }}} */ @@ -1044,20 +1045,20 @@ static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name } else { key_value = get_icu_value_internal( loc_name , key_name , &result,1 ); } - if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) || + if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) || ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){ if( result > 0 && key_value){ /* Tokenize on the "_" or "-" */ - token = php_strtok_r( key_value , DELIMITER ,&last_ptr); + token = php_strtok_r( key_value , DELIMITER ,&last_ptr); if( cur_key_name ){ efree( cur_key_name); } cur_key_name = (char*)ecalloc( 25, 25); - sprintf( cur_key_name , "%s%d", key_name , cnt++); + sprintf( cur_key_name , "%s%d", key_name , cnt++); add_assoc_string( hash_arr, cur_key_name , token ,TRUE ); /* tokenize on the "_" or "-" and stop at singleton if any */ while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){ - sprintf( cur_key_name , "%s%d", key_name , cnt++); + sprintf( cur_key_name , "%s%d", key_name , cnt++); add_assoc_string( hash_arr, cur_key_name , token , TRUE ); } /* @@ -1077,16 +1078,16 @@ static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name } /*if( key_name != LOC_PRIVATE_TAG && key_value){*/ if( key_value){ - efree(key_value); + efree(key_value); } return cur_result; } /* }}} */ -/* {{{ proto static array Locale::parseLocale($locale) +/* {{{ proto static array Locale::parseLocale($locale) * parses a locale-id into an array the different parts of it }}} */ -/* {{{ proto static array parse_locale($locale) +/* {{{ proto static array parse_locale($locale) * parses a locale-id into an array the different parts of it */ PHP_FUNCTION(locale_parse) @@ -1144,7 +1145,7 @@ PHP_FUNCTION(locale_get_all_variants) char* saved_ptr = NULL; intl_error_reset( NULL TSRMLS_CC ); - + if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &loc_name, &loc_name_len ) == FAILURE) { @@ -1162,15 +1163,15 @@ PHP_FUNCTION(locale_get_all_variants) array_init( return_value ); /* If the locale is grandfathered, stop, no variants */ - if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){ + if( findOffset( LOC_GRANDFATHERED , loc_name ) >= 0 ){ /* ("Grandfathered Tag. No variants."); */ } - else { + else { /* Call ICU variant */ variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0); if( result > 0 && variant){ /* Tokenize on the "_" or "-" */ - token = php_strtok_r( variant , DELIMITER , &saved_ptr); + token = php_strtok_r( variant , DELIMITER , &saved_ptr); add_next_index_stringl( return_value, token , strlen(token) ,TRUE ); /* tokenize on the "_" or "-" and stop at singleton if any */ while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){ @@ -1181,7 +1182,7 @@ PHP_FUNCTION(locale_get_all_variants) efree( variant ); } } - + } /* }}} */ @@ -1220,11 +1221,11 @@ static int strToMatch(const char* str ,char *retstr) /* }}} */ /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize]) -* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm +* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */ /* }}} */ /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize]) -* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm +* Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm */ PHP_FUNCTION(locale_filter_matches) { @@ -1243,13 +1244,13 @@ PHP_FUNCTION(locale_filter_matches) char* cur_lang_tag = NULL; char* cur_loc_range = NULL; - zend_bool boolCanonical = 0; + zend_bool boolCanonical = 0; UErrorCode status = U_ZERO_ERROR; intl_error_reset( NULL TSRMLS_CC ); - + if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b", - &lang_tag, &lang_tag_len , &loc_range , &loc_range_len , + &lang_tag, &lang_tag_len , &loc_range , &loc_range_len , &boolCanonical) == FAILURE) { intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, @@ -1270,7 +1271,7 @@ PHP_FUNCTION(locale_filter_matches) /* canonicalize loc_range */ can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0); if( result ==0) { - intl_error_set( NULL, status, + intl_error_set( NULL, status, "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC ); RETURN_FALSE; } @@ -1278,7 +1279,7 @@ PHP_FUNCTION(locale_filter_matches) /* canonicalize lang_tag */ can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result , 0); if( result ==0) { - intl_error_set( NULL, status, + intl_error_set( NULL, status, "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC ); RETURN_FALSE; } @@ -1306,11 +1307,11 @@ PHP_FUNCTION(locale_filter_matches) /* check if prefix */ token = strstr( cur_lang_tag , cur_loc_range ); - + if( token && (token==cur_lang_tag) ){ /* check if the char. after match is SEPARATOR */ chrcheck = token + (strlen(cur_loc_range)); - if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){ + if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){ if( cur_lang_tag){ efree( cur_lang_tag ); } @@ -1346,7 +1347,7 @@ PHP_FUNCTION(locale_filter_matches) else{ /* Convert to lower case for case-insensitive comparison */ cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1); - + result = strToMatch( lang_tag , cur_lang_tag); if( result == 0) { efree( cur_lang_tag ); @@ -1362,11 +1363,11 @@ PHP_FUNCTION(locale_filter_matches) /* check if prefix */ token = strstr( cur_lang_tag , cur_loc_range ); - + if( token && (token==cur_lang_tag) ){ /* check if the char. after match is SEPARATOR */ chrcheck = token + (strlen(cur_loc_range)); - if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){ + if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){ if( cur_lang_tag){ efree( cur_lang_tag ); } @@ -1393,7 +1394,7 @@ PHP_FUNCTION(locale_filter_matches) static void array_cleanup( char* arr[] , int arr_size) { int i=0; - for( i=0; i< arr_size; i++ ){ + for( i=0; i< arr_size; i++ ){ if( arr[i*2] ){ efree( arr[i*2]); } @@ -1403,7 +1404,7 @@ static void array_cleanup( char* arr[] , int arr_size) #define LOOKUP_CLEAN_RETURN(value) array_cleanup(cur_arr, cur_arr_len); return (value) /* {{{ -* returns the lookup result to lookup_loc_range_src_php +* returns the lookup result to lookup_loc_range_src_php * internal function */ static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize TSRMLS_DC) @@ -1427,7 +1428,7 @@ static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int ca for(zend_hash_internal_pointer_reset(hash_arr); zend_hash_has_more_elements(hash_arr) == SUCCESS; zend_hash_move_forward(hash_arr)) { - + if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) { /* Should never actually fail since the key is known to exist.*/ continue; @@ -1436,7 +1437,7 @@ static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int ca /* element value is not a string */ intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC); LOOKUP_CLEAN_RETURN(NULL); - } + } cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value)); result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]); if(result == 0) { @@ -1444,12 +1445,12 @@ static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int ca LOOKUP_CLEAN_RETURN(NULL); } cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value); - cur_arr_len++ ; + cur_arr_len++ ; } /* end of for */ /* Canonicalize array elements */ if(canonicalize) { - for(i=0; i 0) { - for(i=0; i< cur_arr_len; i++){ - if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) { + for(i=0; i< cur_arr_len; i++){ + if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) { /* Match found */ return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]); efree(cur_loc_range); @@ -1515,14 +1516,14 @@ static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int ca } /* }}} */ -/* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]]) +/* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]]) * Searchs the items in $langtag for the best match to the language -* range +* range */ /* }}} */ /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]]) * Searchs the items in $langtag for the best match to the language -* range +* range */ PHP_FUNCTION(locale_lookup) { @@ -1552,8 +1553,8 @@ PHP_FUNCTION(locale_lookup) if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) { RETURN_EMPTY_STRING(); - } - + } + result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC); if(result == NULL || result[0] == '\0') { if( fallback_loc ) { @@ -1590,10 +1591,10 @@ PHP_FUNCTION(locale_accept_from_http) "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC ); RETURN_FALSE; } - + available = ures_openAvailableLocales(NULL, &status); INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list"); - len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN, + len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN, &outResult, http_accept, available, &status); uenum_close(available); INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale"); diff --git a/ext/intl/tests/bug72241.phpt b/ext/intl/tests/bug72241.phpt new file mode 100644 index 0000000000..397e1e7834 --- /dev/null +++ b/ext/intl/tests/bug72241.phpt @@ -0,0 +1,14 @@ +--TEST-- +Bug #72241: get_icu_value_internal out-of-bounds read +--SKIPIF-- + +--FILE-- +