summaryrefslogtreecommitdiff
path: root/chromium/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.cc
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.cc')
-rw-r--r--chromium/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.cc100
1 files changed, 76 insertions, 24 deletions
diff --git a/chromium/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.cc b/chromium/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.cc
index ba400f32f08..82aab62d170 100644
--- a/chromium/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.cc
+++ b/chromium/components/autofill/core/browser/data_model/autofill_structured_address_regex_provider.cc
@@ -160,12 +160,16 @@ const char kOptionalLastNamePrefixRe[] =
// Regular expression to match the affixes that indicate the floor an
// apartment is located in.
const char kFloorAffixRe[] =
- "((°|º|\\.|\\s)*"
- "(floor|flur|fl|og|obergeschoss|ug|untergeschoss|geschoss|andar)"
+ "((°|º|\\.|\\s|-)*"
+ "(floor|flur|fl|og|obergeschoss|ug|untergeschoss|geschoss|andar|piso|º)"
"(\\.|\\s|-)*)";
+// Prefix that indicates an apartment number.
const char kApartmentNumberPrefix[] =
- "((apt|apartment|wohnung|apto)(\\.|\\s|-)*)";
+ "((apt|apartment|wohnung|apto|-)(\\.|\\s|-)*)";
+
+// Suffix that inficates an apartment number.
+const char kApartmentNumberSuffix[] = "(\\.|\\s|-)*(ª)";
// Regular expression to match the prefixes that indicate a house number.
const char kHouseNumberOptionalPrefixRe[] = "(((no|°|º|number)(\\.|-|\\s)*)?)";
@@ -257,8 +261,8 @@ std::string ParseOnlyLastNameExpression() {
// Returns an expression to parse a name that consists of a first, middle and
// last name with an optional honorific prefix. The full name is parsed into
-// |NAME_FULL|. The name can start with an honorific prefix that is parsed
-// into |NAME_HONORIFIC_PREFIX|. The last token is parsed into |NAME_LAST|.
+// |NAME_FULL|. The name can start with an honorific prefix that is ignored.
+// The last token is parsed into |NAME_LAST|.
// This token may be preceded by a last name prefix like "Mac" or
// "von" that is included in |NAME_LAST|. If the strings contains any
// remaining tokens, the first token is parsed into
@@ -266,8 +270,8 @@ std::string ParseOnlyLastNameExpression() {
std::string ParseFirstMiddleLastNameExpression() {
return CaptureTypeWithPattern(
NAME_FULL,
- {CaptureTypeWithPattern(NAME_HONORIFIC_PREFIX, kHonorificPrefixRe,
- CaptureOptions{.quantifier = MATCH_OPTIONAL}),
+ {NoCapturePattern(kHonorificPrefixRe,
+ CaptureOptions{.quantifier = MATCH_OPTIONAL}),
CaptureTypeWithPattern(NAME_FIRST, kSingleWordRe,
CaptureOptions{.quantifier = MATCH_OPTIONAL}),
CaptureTypeWithPattern(
@@ -281,15 +285,15 @@ std::string ParseFirstMiddleLastNameExpression() {
// Returns an expression to parse a name that starts with the last name,
// followed by a comma, and than the first and middle names.
// The full name is parsed into |NAME_FULL|. The name can start with an optional
-// honorific prefix that is parsed into |HONORIFIC_PREFIX|, follow by a single
+// honorific prefix that is ignored, followed by a single
// token that is parsed into |LAST_NAME|. The |LAST_NAME| must be preceded by a
// comma with optional spaces. The next token is parsed into |NAME_FIRST| and
// all remaining tokens are parsed into |NAME_MIDDLE|.
std::string ParseLastCommaFirstMiddleExpression() {
return CaptureTypeWithPattern(
NAME_FULL,
- {CaptureTypeWithPattern(NAME_HONORIFIC_PREFIX, kHonorificPrefixRe,
- CaptureOptions{.quantifier = MATCH_OPTIONAL}),
+ {NoCapturePattern(kHonorificPrefixRe,
+ CaptureOptions{.quantifier = MATCH_OPTIONAL}),
CaptureTypeWithPattern(NAME_LAST,
{kOptionalLastNamePrefixRe, kSingleWordRe},
{.separator = "\\s*,\\s*"}),
@@ -321,13 +325,13 @@ std::string ParseHispanicLastNameExpression() {
}
// Returns an expression to parse a full Hispanic/Latinx name that
-// contains an optional honorific prefix, a first name, and a last name as
-// specified by |ParseHispanicLastNameExpression()|.
+// contains an optional honorific prefix which is ignored, a first name, and a
+// last name as specified by |ParseHispanicLastNameExpression()|.
std::string ParseHispanicFullNameExpression() {
return CaptureTypeWithPattern(
NAME_FULL,
- {CaptureTypeWithPattern(NAME_HONORIFIC_PREFIX, kHonorificPrefixRe,
- CaptureOptions{.quantifier = MATCH_OPTIONAL}),
+ {NoCapturePattern(kHonorificPrefixRe,
+ CaptureOptions{.quantifier = MATCH_OPTIONAL}),
CaptureTypeWithPattern(
NAME_FIRST, kMultipleLazyWordsRe,
CaptureOptions{.quantifier = MATCH_LAZY_OPTIONAL}),
@@ -354,19 +358,50 @@ std::string ParseStreetNameHouseNumberExpression() {
{CaptureTypeWithPattern(ADDRESS_HOME_STREET_AND_DEPENDENT_STREET_NAME,
CaptureTypeWithPattern(ADDRESS_HOME_STREET_NAME,
kMultipleLazyWordsRe),
- {.separator = ""}),
- CaptureTypeWithPrefixedPattern(ADDRESS_HOME_HOUSE_NUMBER,
- kHouseNumberOptionalPrefixRe,
- "(?:\\d+\\w?)"),
+ CaptureOptions{.separator = ""}),
+ CaptureTypeWithAffixedPattern(ADDRESS_HOME_HOUSE_NUMBER,
+ kHouseNumberOptionalPrefixRe,
+ "(?:\\d+\\w?)", "(th\\.|\\.)?"),
CaptureTypeWithPattern(
ADDRESS_HOME_SUBPREMISE,
{
CaptureTypeWithPrefixedPattern(
- ADDRESS_HOME_FLOOR, kFloorAffixRe, "(?:(\\d{0,3}\\w?))",
+ ADDRESS_HOME_FLOOR, kFloorAffixRe, "(?:(\\d{1,3}\\w?|\\w))",
CaptureOptions{.quantifier = MATCH_OPTIONAL}),
CaptureTypeWithPrefixedPattern(
ADDRESS_HOME_APT_NUM, kApartmentNumberPrefix,
- "(?:(\\d{0,3}\\w?))",
+ "(?:(\\d{1,3}\\w?|\\w))",
+ CaptureOptions{.quantifier = MATCH_OPTIONAL}),
+ },
+ CaptureOptions{.quantifier = MATCH_OPTIONAL})});
+}
+
+// Returns an expression to parse a street address into the street name, the
+// house number and the subpremise. The latter is parsed into the floor and
+// apartment number. The expression is applicable, if the street name comes
+// before the house number, followed by the floor and the apartment.
+// Both the floor and the apartment must be indicated by a suffix.
+// Example: Calla 1, 2º, 3ª
+// Where 2 is the floor and 3 the apartment number.
+std::string ParseStreetNameHouseNumberSuffixedFloorAndAppartmentExpression() {
+ return CaptureTypeWithPattern(
+ ADDRESS_HOME_STREET_ADDRESS,
+ {CaptureTypeWithPattern(ADDRESS_HOME_STREET_AND_DEPENDENT_STREET_NAME,
+ CaptureTypeWithPattern(ADDRESS_HOME_STREET_NAME,
+ kMultipleLazyWordsRe),
+ CaptureOptions{.separator = ""}),
+ CaptureTypeWithAffixedPattern(ADDRESS_HOME_HOUSE_NUMBER,
+ kHouseNumberOptionalPrefixRe,
+ "(?:\\d+\\w?)", "(th\\.|\\.)?"),
+ CaptureTypeWithPattern(
+ ADDRESS_HOME_SUBPREMISE,
+ {
+ CaptureTypeWithSuffixedPattern(
+ ADDRESS_HOME_FLOOR, "(?:(\\d{1,3}\\w?|\\w))", kFloorAffixRe,
+ CaptureOptions{.quantifier = MATCH_OPTIONAL}),
+ CaptureTypeWithAffixedPattern(
+ ADDRESS_HOME_APT_NUM, "(-\\s*)?", "(?:(\\d{1,3}\\w?|\\w))",
+ kApartmentNumberSuffix,
CaptureOptions{.quantifier = MATCH_OPTIONAL}),
},
CaptureOptions{.quantifier = MATCH_OPTIONAL})});
@@ -388,9 +423,9 @@ std::string ParseStreetNameHouseNumberExpressionSuffixedFloor() {
CaptureTypeWithPattern(ADDRESS_HOME_STREET_NAME,
kMultipleLazyWordsRe),
{.separator = ""}),
- CaptureTypeWithPrefixedPattern(ADDRESS_HOME_HOUSE_NUMBER,
- kHouseNumberOptionalPrefixRe,
- "(?:\\d+\\w?)"),
+ CaptureTypeWithAffixedPattern(ADDRESS_HOME_HOUSE_NUMBER,
+ kHouseNumberOptionalPrefixRe,
+ "(?:\\d+\\w?)", "(th\\.|\\.)?"),
CaptureTypeWithPattern(
ADDRESS_HOME_SUBPREMISE,
{
@@ -414,7 +449,9 @@ std::string ParseStreetNameHouseNumberExpressionSuffixedFloor() {
std::string ParseHouseNumberStreetNameExpression() {
return CaptureTypeWithPattern(
ADDRESS_HOME_STREET_ADDRESS,
- {CaptureTypeWithPattern(ADDRESS_HOME_HOUSE_NUMBER, "(?:\\d+\\w{0,3})"),
+ {CaptureTypeWithAffixedPattern(ADDRESS_HOME_HOUSE_NUMBER,
+ kHouseNumberOptionalPrefixRe,
+ "(?:\\d+\\w?)", "(th\\.|\\.)?"),
CaptureTypeWithPattern(ADDRESS_HOME_STREET_AND_DEPENDENT_STREET_NAME,
CaptureTypeWithPattern(ADDRESS_HOME_STREET_NAME,
kMultipleLazyWordsRe),
@@ -433,6 +470,17 @@ std::string ParseHouseNumberStreetNameExpression() {
CaptureOptions{.quantifier = MATCH_OPTIONAL})});
}
+// Returns a regular expression to parse a name with a honorific into the prefix
+// and the full name.
+std::string ParsePrefixedName() {
+ return CaptureTypeWithPattern(
+ NAME_FULL_WITH_HONORIFIC_PREFIX,
+ {CaptureTypeWithPattern(NAME_HONORIFIC_PREFIX, kHonorificPrefixRe,
+ CaptureOptions{.quantifier = MATCH_OPTIONAL}),
+ CaptureTypeWithPattern(NAME_FULL, ".+",
+ CaptureOptions{.quantifier = MATCH_REQUIRED})});
+}
+
} // namespace
StructuredAddressesRegExProvider::StructuredAddressesRegExProvider() = default;
@@ -481,8 +529,12 @@ std::string StructuredAddressesRegExProvider::GetPattern(
return ParseHouseNumberStreetNameExpression();
case RegEx::kParseStreetNameHouseNumberSuffixedFloor:
return ParseStreetNameHouseNumberExpressionSuffixedFloor();
+ case RegEx::kParseStreetNameHouseNumberSuffixedFloorAndAppartmentRe:
+ return ParseStreetNameHouseNumberSuffixedFloorAndAppartmentExpression();
case RegEx::kParseStreetNameHouseNumber:
return ParseStreetNameHouseNumberExpression();
+ case RegEx::kParsePrefixedName:
+ return ParsePrefixedName();
}
NOTREACHED();
}