summaryrefslogtreecommitdiff
path: root/chromium/services/data_decoder/xml_parser.cc
blob: d2f81d7823a5e40a0262409395786081db6426d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "services/data_decoder/xml_parser.h"

#include <map>
#include <utility>

#include "base/strings/string_util.h"
#include "base/values.h"
#include "third_party/libxml/chromium/libxml_utils.h"

namespace data_decoder {

using AttributeMap = std::map<std::string, std::string>;
using NamespaceMap = std::map<std::string, std::string>;

namespace {

void ReportError(XmlParser::ParseCallback callback, const std::string& error) {
  std::move(callback).Run(/*result=*/base::nullopt, base::make_optional(error));
}

enum class TextNodeType { kText, kCData };

// Returns false if the current node in |xml_reader| is not text or CData.
// Otherwise returns true and sets |text| to the text/CData of the current node
// and |node_type| to kText or kCData.
bool GetTextFromNode(XmlReader* xml_reader,
                     std::string* text,
                     TextNodeType* node_type) {
  if (xml_reader->GetTextIfTextElement(text)) {
    *node_type = TextNodeType::kText;
    return true;
  }
  if (xml_reader->GetTextIfCDataElement(text)) {
    *node_type = TextNodeType::kCData;
    return true;
  }
  return false;
}

base::Value CreateTextNode(const std::string& text, TextNodeType node_type) {
  base::Value element(base::Value::Type::DICTIONARY);
  element.SetKey(mojom::XmlParser::kTypeKey,
                 base::Value(node_type == TextNodeType::kText
                                 ? mojom::XmlParser::kTextNodeType
                                 : mojom::XmlParser::kCDataNodeType));
  element.SetKey(mojom::XmlParser::kTextKey, base::Value(text));
  return element;
}

// Creates and returns new element node with the tag name |name|.
base::Value CreateNewElement(const std::string& name) {
  base::Value element(base::Value::Type::DICTIONARY);
  element.SetKey(mojom::XmlParser::kTypeKey,
                 base::Value(mojom::XmlParser::kElementType));
  element.SetKey(mojom::XmlParser::kTagKey, base::Value(name));
  return element;
}

// Adds |child| as a child of |element|, creating the children list if
// necessary. Returns a ponter to |child|.
base::Value* AddChildToElement(base::Value* element, base::Value child) {
  DCHECK(element->is_dict());
  base::Value* children = element->FindKey(mojom::XmlParser::kChildrenKey);
  DCHECK(!children || children->is_list());
  if (!children)
    children = element->SetKey(mojom::XmlParser::kChildrenKey,
                               base::Value(base::Value::Type::LIST));
  children->Append(std::move(child));
  return &children->GetList().back();
}

void PopulateNamespaces(base::Value* node_value, XmlReader* xml_reader) {
  DCHECK(node_value->is_dict());
  NamespaceMap namespaces;
  if (!xml_reader->GetAllDeclaredNamespaces(&namespaces) || namespaces.empty())
    return;

  base::Value namespace_dict(base::Value::Type::DICTIONARY);
  for (auto ns : namespaces)
    namespace_dict.SetKey(ns.first, base::Value(ns.second));

  node_value->SetKey(mojom::XmlParser::kNamespacesKey,
                     std::move(namespace_dict));
}

void PopulateAttributes(base::Value* node_value, XmlReader* xml_reader) {
  DCHECK(node_value->is_dict());
  AttributeMap attributes;
  if (!xml_reader->GetAllNodeAttributes(&attributes) || attributes.empty())
    return;

  base::Value attribute_dict(base::Value::Type::DICTIONARY);
  for (auto attribute : attributes)
    attribute_dict.SetKey(attribute.first, base::Value(attribute.second));

  node_value->SetKey(mojom::XmlParser::kAttributesKey,
                     std::move(attribute_dict));
}

}  // namespace

XmlParser::XmlParser(
    std::unique_ptr<service_manager::ServiceContextRef> service_ref)
    : service_ref_(std::move(service_ref)) {}

XmlParser::~XmlParser() = default;

void XmlParser::Parse(const std::string& xml, ParseCallback callback) {
  XmlReader xml_reader;
  if (!xml_reader.Load(xml)) {
    ReportError(std::move(callback), "Invalid XML: failed to load");
    return;
  }

  base::Value root_element;
  std::vector<base::Value*> element_stack;
  while (xml_reader.Read()) {
    if (xml_reader.IsClosingElement()) {
      if (element_stack.empty()) {
        ReportError(std::move(callback), "Invalid XML: unbalanced elements");
        return;
      }
      element_stack.pop_back();
      continue;
    }

    std::string text;
    TextNodeType text_node_type = TextNodeType::kText;
    base::Value* current_element =
        element_stack.empty() ? nullptr : element_stack.back();
    bool push_new_node_to_stack = false;
    base::Value new_element;
    if (GetTextFromNode(&xml_reader, &text, &text_node_type)) {
      if (!base::IsStringUTF8(text)) {
        ReportError(std::move(callback), "Invalid XML: invalid UTF8 text.");
        return;
      }
      new_element = CreateTextNode(text, text_node_type);
    } else if (xml_reader.IsElement()) {
      new_element = CreateNewElement(xml_reader.NodeFullName());
      PopulateNamespaces(&new_element, &xml_reader);
      PopulateAttributes(&new_element, &xml_reader);
      // Self-closing (empty) element have no close tag (or children); don't
      // push them on the element stack.
      push_new_node_to_stack = !xml_reader.IsEmptyElement();
    } else {
      // Ignore all other node types (spaces, comments, processing instructions,
      // DTDs...).
      continue;
    }

    base::Value* new_element_ptr;
    if (current_element) {
      new_element_ptr =
          AddChildToElement(current_element, std::move(new_element));
    } else {
      // First element we are parsing, it becomes the root element.
      DCHECK(xml_reader.IsElement());
      DCHECK(root_element.is_none());
      root_element = std::move(new_element);
      new_element_ptr = &root_element;
    }
    if (push_new_node_to_stack)
      element_stack.push_back(new_element_ptr);
  }

  if (!element_stack.empty()) {
    ReportError(std::move(callback), "Invalid XML: unbalanced elements");
    return;
  }
  base::DictionaryValue* dictionary = nullptr;
  root_element.GetAsDictionary(&dictionary);
  if (!dictionary || dictionary->empty()) {
    ReportError(std::move(callback), "Invalid XML: bad content");
    return;
  }
  std::move(callback).Run(base::make_optional(std::move(root_element)),
                          base::Optional<std::string>());
}

}  // namespace data_decoder