summaryrefslogtreecommitdiff
path: root/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc')
-rw-r--r--chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc129
1 files changed, 129 insertions, 0 deletions
diff --git a/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc
new file mode 100644
index 00000000000..e5412a7bd4e
--- /dev/null
+++ b/chromium/chrome/renderer/safe_browsing/phishing_url_feature_extractor_unittest.cc
@@ -0,0 +1,129 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/renderer/safe_browsing/phishing_url_feature_extractor.h"
+
+#include <string>
+#include <vector>
+#include "chrome/renderer/safe_browsing/features.h"
+#include "chrome/renderer/safe_browsing/test_utils.h"
+#include "testing/gmock/include/gmock/gmock.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "url/gurl.h"
+
+using ::testing::ElementsAre;
+
+namespace safe_browsing {
+
+class PhishingUrlFeatureExtractorTest : public ::testing::Test {
+ protected:
+ PhishingUrlFeatureExtractor extractor_;
+
+ void SplitStringIntoLongAlphanumTokens(const std::string& full,
+ std::vector<std::string>* tokens) {
+ PhishingUrlFeatureExtractor::SplitStringIntoLongAlphanumTokens(full,
+ tokens);
+ }
+};
+
+TEST_F(PhishingUrlFeatureExtractorTest, ExtractFeatures) {
+ std::string url = "http://123.0.0.1/mydocuments/a.file.html";
+ FeatureMap expected_features;
+ expected_features.AddBooleanFeature(features::kUrlHostIsIpAddress);
+ expected_features.AddBooleanFeature(features::kUrlPathToken +
+ std::string("mydocuments"));
+ expected_features.AddBooleanFeature(features::kUrlPathToken +
+ std::string("file"));
+ expected_features.AddBooleanFeature(features::kUrlPathToken +
+ std::string("html"));
+
+ FeatureMap features;
+ ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
+ ExpectFeatureMapsAreEqual(features, expected_features);
+
+ url = "http://www.www.cnn.co.uk/sports/sports/index.html?shouldnotappear";
+ expected_features.Clear();
+ expected_features.AddBooleanFeature(features::kUrlTldToken +
+ std::string("co.uk"));
+ expected_features.AddBooleanFeature(features::kUrlDomainToken +
+ std::string("cnn"));
+ expected_features.AddBooleanFeature(features::kUrlOtherHostToken +
+ std::string("www"));
+ expected_features.AddBooleanFeature(features::kUrlNumOtherHostTokensGTOne);
+ expected_features.AddBooleanFeature(features::kUrlPathToken +
+ std::string("sports"));
+ expected_features.AddBooleanFeature(features::kUrlPathToken +
+ std::string("index"));
+ expected_features.AddBooleanFeature(features::kUrlPathToken +
+ std::string("html"));
+
+ features.Clear();
+ ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
+ ExpectFeatureMapsAreEqual(features, expected_features);
+
+ url = "http://justadomain.com/";
+ expected_features.Clear();
+ expected_features.AddBooleanFeature(features::kUrlTldToken +
+ std::string("com"));
+ expected_features.AddBooleanFeature(features::kUrlDomainToken +
+ std::string("justadomain"));
+
+ features.Clear();
+ ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
+ ExpectFeatureMapsAreEqual(features, expected_features);
+
+ url = "http://witharef.com/#abc";
+ expected_features.Clear();
+ expected_features.AddBooleanFeature(features::kUrlTldToken +
+ std::string("com"));
+ expected_features.AddBooleanFeature(features::kUrlDomainToken +
+ std::string("witharef"));
+
+ features.Clear();
+ ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
+ ExpectFeatureMapsAreEqual(features, expected_features);
+
+ url = "http://...www..lotsodots....com./";
+ expected_features.Clear();
+ expected_features.AddBooleanFeature(features::kUrlTldToken +
+ std::string("com"));
+ expected_features.AddBooleanFeature(features::kUrlDomainToken +
+ std::string("lotsodots"));
+ expected_features.AddBooleanFeature(features::kUrlOtherHostToken +
+ std::string("www"));
+
+ features.Clear();
+ ASSERT_TRUE(extractor_.ExtractFeatures(GURL(url), &features));
+ ExpectFeatureMapsAreEqual(features, expected_features);
+
+ url = "http://unrecognized.tld/";
+ EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
+
+ url = "http://com/123";
+ EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
+
+ url = "http://.co.uk/";
+ EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
+
+ url = "file:///nohost.txt";
+ EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
+
+ url = "not:valid:at:all";
+ EXPECT_FALSE(extractor_.ExtractFeatures(GURL(url), &features));
+}
+
+TEST_F(PhishingUrlFeatureExtractorTest, SplitStringIntoLongAlphanumTokens) {
+ std::string full = "This.is/a_pretty\\unusual-!path,indeed";
+ std::vector<std::string> long_tokens;
+ SplitStringIntoLongAlphanumTokens(full, &long_tokens);
+ EXPECT_THAT(long_tokens,
+ ElementsAre("This", "pretty", "unusual", "path", "indeed"));
+
+ long_tokens.clear();
+ full = "...i-am_re/al&ly\\b,r,o|k=e:n///up%20";
+ SplitStringIntoLongAlphanumTokens(full, &long_tokens);
+ EXPECT_THAT(long_tokens, ElementsAre());
+}
+
+} // namespace safe_browsing