1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
|
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "components/autofill/core/browser/form_processing/label_processing_util.h"
#include "base/ranges/algorithm.h"
#include "base/strings/string_split.h"
#include "base/strings/utf_string_conversions.h"
namespace autofill {
using LabelPieces = std::vector<base::StringPiece16>;
// The maximum number of fields that can share a label.
const int kMaxNumberOfFieldsToShareALabel = 3;
// The maximum length of a label that can be shared among fields.
const int kMaxLengthOfShareableLabel = 40;
base::Optional<std::vector<base::string16>> GetParseableLabels(
const LabelPieces& labels) {
// Make a copy of the labels.
LabelPieces shared_labels = labels;
// Tracks if at least one shared label was found.
bool shared_labels_found = false;
// The index of the current field that may be eligible to share its label with
// the subsequent fields.
size_t label_index = 0;
while (label_index < labels.size()) {
const auto& label = labels.at(label_index);
// If the label is empty or has a size that exceeds
// |kMaxLengthOfShareableLabel| it can not be shared with subsequent fields.
if (label.empty() || label.size() > kMaxLengthOfShareableLabel) {
++label_index;
continue;
}
// Otherwise search if the subsequent fields are empty.
size_t scan_index = label_index + 1;
while (scan_index < labels.size()) {
if (!labels.at(scan_index).empty()) {
break;
}
++scan_index;
}
// After the loop, the |scan_index| points to the first subsequent field
// that does not have an empty label or is the first out-of-bound index.
// Calculate the number of fields that may share a label.
size_t fields_to_share_label = scan_index - label_index;
// Remember the current index and increment it to continue with the next
// non-empty field.
size_t shared_label_starting_index = label_index;
label_index = scan_index;
// Determine if there is the correct number of fields that may share a
// label.
if (fields_to_share_label == 1 ||
fields_to_share_label > kMaxNumberOfFieldsToShareALabel) {
continue;
}
// Otherwise, try to split the label by single character separators.
LabelPieces label_components = base::SplitStringPiece(
label, base::ASCIIToUTF16("/,&-"), base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
// If the number of components does not match, try to split by common
// separating words.
if (label_components.size() != fields_to_share_label) {
for (const char* word : {" and ", " und ", " et ", " y "}) {
label_components = base::SplitStringPieceUsingSubstr(
label, base::ASCIIToUTF16(word), base::TRIM_WHITESPACE,
base::SPLIT_WANT_NONEMPTY);
if (label_components.size() == fields_to_share_label)
break;
}
}
// Continue to the next field if the right number of components has not
// been found.
if (label_components.size() != fields_to_share_label)
continue;
shared_labels_found = true;
// Otherwise assign the label components to the fields.
for (size_t i = 0; i < label_components.size(); ++i) {
shared_labels[shared_label_starting_index + i] = label_components.at(i);
}
}
if (!shared_labels_found) {
return base::nullopt;
}
// Otherwise convert the shared label string pieces into strings for memory
// safety.
std::vector<base::string16> result;
result.reserve(shared_labels.size());
base::ranges::transform(shared_labels, std::back_inserter(result),
[](auto& s) { return base::string16(s); });
return base::make_optional(std::move(result));
}
} // namespace autofill
|