summaryrefslogtreecommitdiff
path: root/chromium/components/autofill/core/browser/form_processing/label_processing_util.cc
blob: 50e7e56a1b3cab5d7f331ad7fe660b1c6d881b1b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
// Copyright 2021 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "components/autofill/core/browser/form_processing/label_processing_util.h"

#include "base/ranges/algorithm.h"
#include "base/strings/string_split.h"
#include "base/strings/utf_string_conversions.h"

namespace autofill {

using LabelPieces = std::vector<base::StringPiece16>;

// The maximum number of fields that can share a label.
const int kMaxNumberOfFieldsToShareALabel = 3;
// The maximum length of a label that can be shared among fields.
const int kMaxLengthOfShareableLabel = 40;

base::Optional<std::vector<base::string16>> GetParseableLabels(
    const LabelPieces& labels) {
  // Make a copy of the labels.
  LabelPieces shared_labels = labels;

  // Tracks if at least one shared label was found.
  bool shared_labels_found = false;

  // The index of the current field that may be eligible to share its label with
  // the subsequent fields.
  size_t label_index = 0;
  while (label_index < labels.size()) {
    const auto& label = labels.at(label_index);
    // If the label is empty or has a size that exceeds
    // |kMaxLengthOfShareableLabel| it can not be shared with subsequent fields.
    if (label.empty() || label.size() > kMaxLengthOfShareableLabel) {
      ++label_index;
      continue;
    }

    // Otherwise search if the subsequent fields are empty.
    size_t scan_index = label_index + 1;
    while (scan_index < labels.size()) {
      if (!labels.at(scan_index).empty()) {
        break;
      }
      ++scan_index;
    }
    // After the loop, the |scan_index| points to the first subsequent field
    // that does not have an empty label or is the first out-of-bound index.

    // Calculate the number of fields that may share a label.
    size_t fields_to_share_label = scan_index - label_index;

    // Remember the current index and increment it to continue with the next
    // non-empty field.
    size_t shared_label_starting_index = label_index;
    label_index = scan_index;

    // Determine if there is the correct number of fields that may share a
    // label.
    if (fields_to_share_label == 1 ||
        fields_to_share_label > kMaxNumberOfFieldsToShareALabel) {
      continue;
    }

    // Otherwise, try to split the label by single character separators.
    LabelPieces label_components = base::SplitStringPiece(
        label, base::ASCIIToUTF16("/,&-"), base::TRIM_WHITESPACE,
        base::SPLIT_WANT_NONEMPTY);

    // If the number of components does not match, try to split by common
    // separating words.
    if (label_components.size() != fields_to_share_label) {
      for (const char* word : {" and ", " und ", " et ", " y "}) {
        label_components = base::SplitStringPieceUsingSubstr(
            label, base::ASCIIToUTF16(word), base::TRIM_WHITESPACE,
            base::SPLIT_WANT_NONEMPTY);
        if (label_components.size() == fields_to_share_label)
          break;
      }
    }

    // Continue to the next field if the right number of components has not
    // been found.
    if (label_components.size() != fields_to_share_label)
      continue;

    shared_labels_found = true;
    // Otherwise assign the label components to the fields.
    for (size_t i = 0; i < label_components.size(); ++i) {
      shared_labels[shared_label_starting_index + i] = label_components.at(i);
    }
  }

  if (!shared_labels_found) {
    return base::nullopt;
  }

  // Otherwise convert the shared label string pieces into strings for memory
  // safety.
  std::vector<base::string16> result;
  result.reserve(shared_labels.size());
  base::ranges::transform(shared_labels, std::back_inserter(result),
                          [](auto& s) { return base::string16(s); });
  return base::make_optional(std::move(result));
}

}  // namespace autofill