summaryrefslogtreecommitdiff
path: root/ext
diff options
context:
space:
mode:
authorAnatol Belski <ab@php.net>2017-10-20 19:14:22 +0200
committerAnatol Belski <ab@php.net>2017-10-20 19:23:32 +0200
commitf95063647ccebfde5e8e39bc3349e63f19662395 (patch)
tree69dd4611bdc69ca692025c9711931aacc60cc24b /ext
parent2198d38cbc12beeaf00be688cf33a5a6697ea20c (diff)
downloadphp-git-f95063647ccebfde5e8e39bc3349e63f19662395.tar.gz
Fixed bug #73655 Spoofchecker::isSuspicious behavior change due to upstream changes
There are significant changes in the spoof checking reflecting http://www.unicode.org/reports/tr39/tr39-15.html and relying on the restriction levels. ICU 58+ removes WSC and MSC handling and otherwise undergoes big changes in both code and data areas. Keep up with the basic points for now, as we need to move forward and provide an acceptable experience to PHP users linking to a newer ICU. The most distros ATM don't provide ICU > 57.1, though. We'll need for sure to keep up with the BC breach in ICU 58+ in possible further aspects.
Diffstat (limited to 'ext')
-rw-r--r--ext/intl/spoofchecker/spoofchecker_class.h2
-rw-r--r--ext/intl/spoofchecker/spoofchecker_create.c13
-rw-r--r--ext/intl/tests/spoofchecker_006.phpt26
3 files changed, 41 insertions, 0 deletions
diff --git a/ext/intl/spoofchecker/spoofchecker_class.h b/ext/intl/spoofchecker/spoofchecker_class.h
index 7c5864b82f..7a95f315dc 100644
--- a/ext/intl/spoofchecker/spoofchecker_class.h
+++ b/ext/intl/spoofchecker/spoofchecker_class.h
@@ -78,4 +78,6 @@ extern zend_class_entry *Spoofchecker_ce_ptr;
RETURN_FALSE; \
} \
+#define SPOOFCHECKER_DEFAULT_RESTRICTION_LEVEL USPOOF_MODERATELY_RESTRICTIVE
+
#endif // #ifndef SPOOFCHECKER_CLASS_H
diff --git a/ext/intl/spoofchecker/spoofchecker_create.c b/ext/intl/spoofchecker/spoofchecker_create.c
index fbe7cbae1d..1333a0f205 100644
--- a/ext/intl/spoofchecker/spoofchecker_create.c
+++ b/ext/intl/spoofchecker/spoofchecker_create.c
@@ -43,12 +43,25 @@ PHP_METHOD(Spoofchecker, __construct)
co->uspoof = uspoof_open(SPOOFCHECKER_ERROR_CODE_P(co));
INTL_METHOD_CHECK_STATUS(co, "spoofchecker: unable to open ICU Spoof Checker");
+#if U_ICU_VERSION_MAJOR_NUM >= 58
+ /* TODO save it into the object for further suspiction check comparison. */
+ /* ICU 58 removes WSC and MSC handling. However there are restriction
+ levels as defined in
+ http://www.unicode.org/reports/tr39/tr39-15.html#Restriction_Level_Detection
+ and the default is high restrictive. However the moderately restrictive
+ level is what seems to correspond to the setting below applicable to
+ ICU < 58. In further, we might want to utilize uspoof_check2 APIs when
+ it became stable, to use extended check result APIs. Subsequent changes
+ in the unicode security algos are to be watched.*/
+ uspoof_setRestrictionLevel(co->uspoof, SPOOFCHECKER_DEFAULT_RESTRICTION_LEVEL);
+#else
/* Single-script enforcement is on by default. This fails for languages
like Japanese that legally use multiple scripts within a single word,
so we turn it off.
*/
checks = uspoof_getChecks(co->uspoof, SPOOFCHECKER_ERROR_CODE_P(co));
uspoof_setChecks(co->uspoof, checks & ~USPOOF_SINGLE_SCRIPT, SPOOFCHECKER_ERROR_CODE_P(co));
+#endif
zend_restore_error_handling(&error_handling);
}
/* }}} */
diff --git a/ext/intl/tests/spoofchecker_006.phpt b/ext/intl/tests/spoofchecker_006.phpt
new file mode 100644
index 0000000000..038a255419
--- /dev/null
+++ b/ext/intl/tests/spoofchecker_006.phpt
@@ -0,0 +1,26 @@
+--TEST--
+spoofchecker suspicious character checker
+--SKIPIF--
+<?php if(!extension_loaded('intl') || !class_exists("Spoofchecker")) print 'skip'; ?>
+<?php if (version_compare(INTL_ICU_VERSION, '57.1') < 0) die('skip for ICU >= 58.1'); ?>
+--FILE--
+<?php
+
+echo "paypal with Cyrillic spoof characters\n";
+$x = new Spoofchecker();
+var_dump($x->isSuspicious("http://www.payp\u{0430}l.com"));
+var_dump($x->isSuspicious("\u{041F}aypal.com"));
+
+echo "certain all-uppercase Latin sequences can be spoof of Greek\n";
+$x = new Spoofchecker();
+$x->setAllowedLocales("gr_GR");
+var_dump($x->isSuspicious("NAPKIN PEZ"));
+var_dump($x->isSuspicious("napkin pez"));
+?>
+--EXPECTF--
+paypal with Cyrillic spoof characters
+bool(true)
+bool(true)
+certain all-uppercase Latin sequences can be spoof of Greek
+bool(true)
+bool(true)