diff options
author | Leigh <leigh@php.net> | 2016-07-06 12:35:13 +0100 |
---|---|---|
committer | Leigh <leigh@php.net> | 2016-07-06 12:35:13 +0100 |
commit | 06607993f69a8e67758c301776b5db9525783498 (patch) | |
tree | b135815872cc366438332ee9f7255cd2be345d7a /ext/standard/array.c | |
parent | b21de28bb70117d9bfe73efeb7d6bb5691b043e5 (diff) | |
download | php-git-06607993f69a8e67758c301776b5db9525783498.tar.gz |
Improve array_rand distribution
Diffstat (limited to 'ext/standard/array.c')
-rw-r--r-- | ext/standard/array.c | 84 |
1 files changed, 55 insertions, 29 deletions
diff --git a/ext/standard/array.c b/ext/standard/array.c index 2fc3f284ca..2cdb117832 100644 --- a/ext/standard/array.c +++ b/ext/standard/array.c @@ -79,6 +79,12 @@ #define INTERSECT_COMP_DATA_USER 1 #define INTERSECT_COMP_KEY_INTERNAL 0 #define INTERSECT_COMP_KEY_USER 1 + +// For array_rand even distribution +#define BITFIELD_BYTES(bits) ((bits + 7) >> 3) +#define BITFIELD_SETBIT(field, bit) field[bit >> 3] |= 1 << (bit & 7) +#define BITFIELD_BITSET(field, bit) ((field[bit >> 3] & (1 << (bit & 7))) != 0) + /* }}} */ ZEND_DECLARE_MODULE_GLOBALS(array) @@ -5033,9 +5039,12 @@ PHP_FUNCTION(array_rand) { zval *input; zend_long randval, num_req = 1; - int num_avail; zend_string *string_key; zend_ulong num_key; + int i; + int num_avail; + char *bitfield; + int negative_bitfield = 0; if (zend_parse_parameters(ZEND_NUM_ARGS(), "a|l", &input, &num_req) == FAILURE) { return; @@ -5043,46 +5052,63 @@ PHP_FUNCTION(array_rand) num_avail = zend_hash_num_elements(Z_ARRVAL_P(input)); - if (ZEND_NUM_ARGS() > 1) { - if (num_req <= 0 || num_req > num_avail) { - php_error_docref(NULL, E_WARNING, "Second argument has to be between 1 and the number of elements in the array"); - return; - } - } - - /* Make the return value an array only if we need to pass back more than one result. */ - if (num_req > 1) { - array_init_size(return_value, (uint32_t)num_req); + if (num_avail == 0) { + php_error_docref(NULL, E_WARNING, "Array is empty"); + return; } - /* We can't use zend_hash_index_find() because the array may have string keys or gaps. */ - ZEND_HASH_FOREACH_KEY(Z_ARRVAL_P(input), num_key, string_key) { - if (!num_req) { - break; - } - - randval = php_rand(); + if (num_req == 1) { + randval = php_mt_rand_range(0, num_avail - 1); - if ((double) (randval / PHP_RAND_MAX) <= (double) num_req / (double) num_avail) { - /* If we are returning a single result, just do it. */ - if (Z_TYPE_P(return_value) != IS_ARRAY) { + ZEND_HASH_FOREACH_KEY(Z_ARRVAL_P(input), num_key, string_key) { + if (randval-- == 0) { if (string_key) { RETURN_STR_COPY(string_key); } else { RETURN_LONG(num_key); } + } + } ZEND_HASH_FOREACH_END(); + } + + if (num_req <= 0 || num_req > num_avail) { + php_error_docref(NULL, E_WARNING, "Second argument has to be between 1 and the number of elements in the array"); + return; + } + + /* Make the return value an array only if we need to pass back more than one result. */ + array_init_size(return_value, (uint32_t)num_req); + if (num_req > (num_avail >> 1)) { + negative_bitfield = 1; + num_req = num_avail - num_req; + } + + bitfield = emalloc(BITFIELD_BYTES(num_avail)); + memset(bitfield, 0, BITFIELD_BYTES(num_avail)); + + i = num_req; + while (i) { + randval = php_mt_rand_range(0, num_avail - 1); + if (!BITFIELD_BITSET(bitfield, randval)) { + BITFIELD_SETBIT(bitfield, randval); + i--; + } + } + + /* We can't use zend_hash_index_find() because the array may have string keys or gaps. */ + i = 0; + ZEND_HASH_FOREACH_KEY(Z_ARRVAL_P(input), num_key, string_key) { + if (BITFIELD_BITSET(bitfield, i) ^ negative_bitfield) { + if (string_key) { + add_next_index_str(return_value, zend_string_copy(string_key)); } else { - /* Append the result to the return value. */ - if (string_key) { - add_next_index_str(return_value, zend_string_copy(string_key)); - } else { - add_next_index_long(return_value, num_key); - } + add_next_index_long(return_value, num_key); } - num_req--; } - num_avail--; + i++; } ZEND_HASH_FOREACH_END(); + + efree(bitfield); } /* }}} */ |