summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChad Smith <chad.smith@canonical.com>2023-02-09 15:50:33 -0700
committerGitHub <noreply@github.com>2023-02-09 15:50:33 -0700
commit55686b977570a6de69bef51c1e1a9d452333995d (patch)
tree0b36881db29374d18ac7728b348fbb229147455f
parentb923a1c4fe740e9142d2c16b269d692a69919c0d (diff)
downloadcloud-init-git-55686b977570a6de69bef51c1e1a9d452333995d.tar.gz
cli: schema also validate vendordata*.
cloud-init schema --annotate now walks any valid user-data, vendor-data and vendor2-data to report whether any of these cloud-config files provide invalid or deprecated schema values. Also, fix a bug in nested mapping annotations to properly report the full nested indexed schema path. The following now can be annotated without KeyErrors: users: - default - lock-passwd: false name: root
-rw-r--r--cloudinit/config/schema.py119
-rw-r--r--cloudinit/safeyaml.py46
-rw-r--r--cloudinit/user_data.py1
-rw-r--r--tests/unittests/config/test_schema.py119
-rw-r--r--tests/unittests/test_safeyaml.py39
5 files changed, 195 insertions, 129 deletions
diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py
index 88590ace..8d2e4af9 100644
--- a/cloudinit/config/schema.py
+++ b/cloudinit/config/schema.py
@@ -11,7 +11,6 @@ import textwrap
from collections import defaultdict
from collections.abc import Iterable
from copy import deepcopy
-from functools import partial
from itertools import chain
from typing import TYPE_CHECKING, List, NamedTuple, Optional, Type, Union, cast
@@ -29,7 +28,6 @@ except ImportError:
ValidationError = Exception # type: ignore
-error = partial(error, sys_exit=True)
LOG = logging.getLogger(__name__)
VERSIONED_USERDATA_SCHEMA_FILE = "versions.schema.cloud-config.json"
@@ -424,10 +422,14 @@ def validate_cloudconfig_schema(
errors: SchemaProblems = []
deprecations: SchemaProblems = []
- for error in sorted(validator.iter_errors(config), key=lambda e: e.path):
- path = ".".join([str(p) for p in error.path])
- problem = (SchemaProblem(path, error.message),)
- if isinstance(error, SchemaDeprecationError): # pylint: disable=W1116
+ for schema_error in sorted(
+ validator.iter_errors(config), key=lambda e: e.path
+ ):
+ path = ".".join([str(p) for p in schema_error.path])
+ problem = (SchemaProblem(path, schema_error.message),)
+ if isinstance(
+ schema_error, SchemaDeprecationError
+ ): # pylint: disable=W1116
deprecations += problem
else:
errors += problem
@@ -610,23 +612,7 @@ def validate_cloudconfig_file(config_path, schema, annotate=False):
@raises SchemaValidationError containing any of schema_errors encountered.
@raises RuntimeError when config_path does not exist.
"""
- if config_path is None:
- # Use system's raw userdata path
- if os.getuid() != 0:
- raise RuntimeError(
- "Unable to read system userdata as non-root user."
- " Try using sudo"
- )
- init = Init(ds_deps=[])
- init.fetch(existing="trust")
- init.consume_data()
- content = load_file(init.paths.get_ipath("cloud_config"), decode=False)
- else:
- if not os.path.exists(config_path):
- raise RuntimeError(
- "Configfile {0} does not exist".format(config_path)
- )
- content = load_file(config_path, decode=False)
+ content = load_file(config_path, decode=False)
if not content.startswith(CLOUD_CONFIG_HEADER):
errors = [
SchemaProblem(
@@ -693,7 +679,7 @@ def validate_cloudconfig_file(config_path, schema, annotate=False):
schema_deprecations=e.schema_deprecations,
)
)
- else:
+ elif e.schema_deprecations:
message = _format_schema_problems(
e.schema_deprecations,
prefix="Cloud config schema deprecations: ",
@@ -1063,7 +1049,8 @@ def load_doc(requested_modules: list) -> str:
"Invalid --docs value {}. Must be one of: {}".format(
list(invalid_docs),
", ".join(all_modules),
- )
+ ),
+ sys_exit=True,
)
for mod_name in all_modules:
if "all" in requested_modules or mod_name in requested_modules:
@@ -1164,28 +1151,80 @@ def handle_schema_args(name, args):
"""Handle provided schema args and perform the appropriate actions."""
exclusive_args = [args.config_file, args.docs, args.system]
if len([arg for arg in exclusive_args if arg]) != 1:
- error("Expected one of --config-file, --system or --docs arguments")
+ error(
+ "Expected one of --config-file, --system or --docs arguments",
+ sys_exit=True,
+ )
if args.annotate and args.docs:
- error("Invalid flag combination. Cannot use --annotate with --docs")
+ error(
+ "Invalid flag combination. Cannot use --annotate with --docs",
+ sys_exit=True,
+ )
full_schema = get_schema()
- if args.config_file or args.system:
- try:
- validate_cloudconfig_file(
- args.config_file, full_schema, args.annotate
+ if args.docs:
+ print(load_doc(args.docs))
+ return
+ if args.config_file:
+ config_files = (("user-data", args.config_file),)
+ elif args.system:
+ if os.getuid() != 0:
+ error(
+ "Unable to read system userdata or vendordata as non-root"
+ " user. Try using sudo.",
+ sys_exit=True,
)
+ init = Init(ds_deps=[])
+ init.fetch(existing="trust")
+ userdata_file = init.paths.get_ipath("cloud_config")
+ config_files = (("user-data", userdata_file),)
+ vendor_config_files = (
+ ("vendor-data", init.paths.get_ipath("vendor_cloud_config")),
+ ("vendor2-data", init.paths.get_ipath("vendor2_cloud_config")),
+ )
+ for cfg_type, vendor_file in vendor_config_files:
+ if os.path.exists(vendor_file):
+ config_files += ((cfg_type, vendor_file),)
+ if not os.path.exists(config_files[0][1]):
+ error(
+ f"Config file {config_files[0][1]} does not exist",
+ fmt="Error: {}",
+ sys_exit=True,
+ )
+
+ nested_output_prefix = ""
+ multi_config_output = bool(len(config_files) > 1)
+ if multi_config_output:
+ print(
+ "Found cloud-config data types: %s"
+ % ", ".join(cfg_type for cfg_type, _ in config_files)
+ )
+ nested_output_prefix = " "
+
+ error_types = []
+ for idx, (cfg_type, cfg_file) in enumerate(config_files, 1):
+ if multi_config_output:
+ print(f"\n{idx}. {cfg_type} at {cfg_file}:")
+ try:
+ validate_cloudconfig_file(cfg_file, full_schema, args.annotate)
except SchemaValidationError as e:
if not args.annotate:
- error(str(e))
+ print(f"{nested_output_prefix}Invalid cloud-config {cfg_file}")
+ error(
+ str(e),
+ fmt=nested_output_prefix + "Error: {}\n",
+ )
+ error_types.append(cfg_type)
except RuntimeError as e:
- error(str(e))
+ print(f"{nested_output_prefix}Invalid cloud-config {cfg_type}")
+ error(str(e), fmt=nested_output_prefix + "Error: {}\n")
+ error_types.append(cfg_type)
else:
- if args.config_file is None:
- cfg_name = "system userdata"
- else:
- cfg_name = args.config_file
- print("Valid cloud-config:", cfg_name)
- elif args.docs:
- print(load_doc(args.docs))
+ print(f"{nested_output_prefix}Valid cloud-config: {cfg_type}")
+ if error_types:
+ error(
+ ", ".join(error_type for error_type in error_types),
+ fmt="Error: Invalid cloud-config schema: {}\n",
+ )
def main():
diff --git a/cloudinit/safeyaml.py b/cloudinit/safeyaml.py
index 368ac861..4a3b6c84 100644
--- a/cloudinit/safeyaml.py
+++ b/cloudinit/safeyaml.py
@@ -25,6 +25,27 @@ class _CustomSafeLoader(yaml.SafeLoader):
return super().construct_scalar(node)
+def _fix_nested_map_index(new_key_path, marks):
+ new_marks = []
+ for mark in marks:
+ if "." not in mark.path:
+ new_marks.append(mark)
+ continue
+ path_prefix, _path_idx = mark.path.rsplit(".", 1)
+ if new_key_path not in mark.path and path_prefix in mark.path:
+ new_marks.append(
+ SchemaPathMarks(
+ # Replace only the first match of path_prefix
+ mark.path.replace(path_prefix, new_key_path, 1),
+ mark.start_mark,
+ mark.end_mark,
+ )
+ )
+ else:
+ new_marks.append(mark)
+ return new_marks
+
+
class _CustomSafeLoaderWithMarks(yaml.SafeLoader):
"""A loader which provides line and column start and end marks for YAML.
@@ -102,7 +123,30 @@ class _CustomSafeLoaderWithMarks(yaml.SafeLoader):
if line_num not in self.schemamarks_by_line:
self.schemamarks_by_line[line_num] = [marks]
else:
- self.schemamarks_by_line[line_num].append(marks)
+ if line_num == sequence_item.end_mark.line:
+ self.schemamarks_by_line[line_num].append(marks)
+ else: # Incorrect multi-line mapping or sequence object.
+ for inner_line in range(
+ line_num, sequence_item.end_mark.line
+ ):
+ if inner_line in self.schemamarks_by_line:
+ schema_marks = self.schemamarks_by_line[inner_line]
+ new_marks = _fix_nested_map_index(
+ node_key_path, schema_marks
+ )
+ if (
+ inner_line == line_num
+ and schema_marks[0].path != node_key_path
+ ):
+ new_marks.insert(
+ 0,
+ SchemaPathMarks(
+ node_key_path,
+ schema_marks[0].start_mark,
+ schema_marks[-1].end_mark,
+ ),
+ )
+ self.schemamarks_by_line[inner_line] = new_marks
return sequence
def get_single_data(self):
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index 51e4cd63..3336b23d 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -166,7 +166,6 @@ class UserDataProcessor:
# TODO(harlowja): Should this be happening, shouldn't
# the part header be modified and not the base?
_replace_header(base_msg, CONTENT_TYPE, ctype)
-
self._attach_part(append_msg, part)
def _attach_launch_index(self, msg):
diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py
index 50128f2c..a3235790 100644
--- a/tests/unittests/config/test_schema.py
+++ b/tests/unittests/config/test_schema.py
@@ -17,7 +17,6 @@ from types import ModuleType
from typing import List, Optional, Sequence, Set
import pytest
-import responses
from cloudinit import stages
from cloudinit.config.schema import (
@@ -282,7 +281,7 @@ class TestValidateCloudConfigSchema:
((None, 1), ({"properties": {"p1": {"type": "string"}}}, 0)),
)
@skipUnlessJsonSchema()
- @mock.patch("cloudinit.config.schema.get_schema")
+ @mock.patch(M_PATH + "get_schema")
def test_validateconfig_schema_use_full_schema_when_no_schema_param(
self, get_schema, schema, call_count
):
@@ -638,14 +637,6 @@ class TestValidateCloudConfigFile:
"""Tests for validate_cloudconfig_file."""
@pytest.mark.parametrize("annotate", (True, False))
- def test_validateconfig_file_error_on_absent_file(self, annotate):
- """On absent config_path, validate_cloudconfig_file errors."""
- with pytest.raises(
- RuntimeError, match="Configfile /not/here does not exist"
- ):
- validate_cloudconfig_file("/not/here", {}, annotate)
-
- @pytest.mark.parametrize("annotate", (True, False))
def test_validateconfig_file_error_on_invalid_header(
self, annotate, tmpdir
):
@@ -708,59 +699,10 @@ class TestValidateCloudConfigFile:
validate_cloudconfig_file(config_file.strpath, schema, annotate)
@skipUnlessJsonSchema()
- @responses.activate
@pytest.mark.parametrize("annotate", (True, False))
@mock.patch("cloudinit.url_helper.time.sleep")
- @mock.patch(M_PATH + "os.getuid", return_value=0)
- def test_validateconfig_file_include_validates_schema(
- self, m_getuid, m_sleep, annotate, mocker
- ):
- """validate_cloudconfig_file raises errors on invalid schema
- when user-data uses `#include`."""
- schema = {"properties": {"p1": {"type": "string", "format": "string"}}}
- included_data = "#cloud-config\np1: -1"
- included_url = "http://asdf/user-data"
- blob = f"#include {included_url}"
- responses.add(responses.GET, included_url, included_data)
-
- ci = stages.Init()
- ci.datasource = FakeDataSource(blob)
- mocker.patch(M_PATH + "Init", return_value=ci)
-
- error_msg = (
- "Cloud config schema errors: p1: -1 is not of type 'string'"
- )
- with pytest.raises(SchemaValidationError, match=error_msg):
- validate_cloudconfig_file(None, schema, annotate)
-
- @skipUnlessJsonSchema()
- @responses.activate
- @pytest.mark.parametrize("annotate", (True, False))
- @mock.patch("cloudinit.url_helper.time.sleep")
- @mock.patch(M_PATH + "os.getuid", return_value=0)
- def test_validateconfig_file_include_success(
- self, m_getuid, m_sleep, annotate, mocker
- ):
- """validate_cloudconfig_file raises errors on invalid schema
- when user-data uses `#include`."""
- schema = {"properties": {"p1": {"type": "string", "format": "string"}}}
- included_data = "#cloud-config\np1: asdf"
- included_url = "http://asdf/user-data"
- blob = f"#include {included_url}"
- responses.add(responses.GET, included_url, included_data)
-
- ci = stages.Init()
- ci.datasource = FakeDataSource(blob)
- mocker.patch(M_PATH + "Init", return_value=ci)
-
- validate_cloudconfig_file(None, schema, annotate)
-
- @skipUnlessJsonSchema()
- @pytest.mark.parametrize("annotate", (True, False))
- @mock.patch("cloudinit.url_helper.time.sleep")
- @mock.patch(M_PATH + "os.getuid", return_value=0)
def test_validateconfig_file_no_cloud_cfg(
- self, m_getuid, m_sleep, annotate, capsys, mocker
+ self, m_sleep, annotate, capsys, mocker
):
"""validate_cloudconfig_file does noop with empty user-data."""
schema = {"properties": {"p1": {"type": "string", "format": "string"}}}
@@ -769,15 +711,18 @@ class TestValidateCloudConfigFile:
ci = stages.Init()
ci.datasource = FakeDataSource(blob)
mocker.patch(M_PATH + "Init", return_value=ci)
+ cloud_config_file = ci.paths.get_ipath_cur("cloud_config")
+ write_file(cloud_config_file, b"")
with pytest.raises(
SchemaValidationError,
match=re.escape(
- "Cloud config schema errors: format-l1.c1: File None needs"
- ' to begin with "#cloud-config"'
+ "Cloud config schema errors: format-l1.c1:"
+ f" File {cloud_config_file} needs to begin with"
+ ' "#cloud-config"'
),
):
- validate_cloudconfig_file(None, schema, annotate)
+ validate_cloudconfig_file(cloud_config_file, schema, annotate)
class TestSchemaDocMarkdown:
@@ -1582,7 +1527,7 @@ class TestMain:
main()
assert 1 == context_manager.value.code
_out, err = capsys.readouterr()
- assert "Error:\nConfigfile NOT_A_FILE does not exist\n" == err
+ assert "Error: Config file NOT_A_FILE does not exist\n" == err
def test_main_invalid_flag_combo(self, capsys):
"""Main exits non-zero when invalid flag combo used."""
@@ -1614,24 +1559,48 @@ class TestMain:
with mock.patch("sys.argv", myargs):
assert 0 == main(), "Expected 0 exit code"
out, _err = capsys.readouterr()
- assert "Valid cloud-config: {0}\n".format(myyaml) == out
+ assert "Valid cloud-config: user-data\n" == out
@mock.patch(M_PATH + "os.getuid", return_value=0)
- def test_main_validates_system_userdata(
- self, m_getuid, capsys, mocker, paths
+ def test_main_validates_system_userdata_and_vendordata(
+ self, _getuid, capsys, mocker, paths
):
"""When --system is provided, main validates system userdata."""
m_init = mocker.patch(M_PATH + "Init")
m_init.return_value.paths.get_ipath = paths.get_ipath_cur
cloud_config_file = paths.get_ipath_cur("cloud_config")
write_file(cloud_config_file, b"#cloud-config\nntp:")
+ vd_file = paths.get_ipath_cur("vendor_cloud_config")
+ write_file(vd_file, b"#cloud-config\nssh_import_id: [me]")
+ vd2_file = paths.get_ipath_cur("vendor2_cloud_config")
+ write_file(vd2_file, b"#cloud-config\nssh_pw_auth: true")
myargs = ["mycmd", "--system"]
with mock.patch("sys.argv", myargs):
- assert 0 == main(), "Expected 0 exit code"
+ main()
out, _err = capsys.readouterr()
- assert "Valid cloud-config: system userdata\n" == out
- @mock.patch("cloudinit.config.schema.os.getuid", return_value=1000)
+ expected = dedent(
+ """\
+ Found cloud-config data types: user-data, vendor-data, vendor2-data
+
+ 1. user-data at {ud_file}:
+ Valid cloud-config: user-data
+
+ 2. vendor-data at {vd_file}:
+ Valid cloud-config: vendor-data
+
+ 3. vendor2-data at {vd2_file}:
+ Valid cloud-config: vendor2-data
+ """
+ )
+ assert (
+ expected.format(
+ ud_file=cloud_config_file, vd_file=vd_file, vd2_file=vd2_file
+ )
+ == out
+ )
+
+ @mock.patch(M_PATH + "os.getuid", return_value=1000)
def test_main_system_userdata_requires_root(self, m_getuid, capsys, paths):
"""Non-root user can't use --system param"""
myargs = ["mycmd", "--system"]
@@ -1641,8 +1610,8 @@ class TestMain:
assert 1 == context_manager.value.code
_out, err = capsys.readouterr()
expected = (
- "Error:\nUnable to read system userdata as non-root user. "
- "Try using sudo\n"
+ "Error:\nUnable to read system userdata or vendordata as non-root"
+ " user. Try using sudo.\n"
)
assert expected == err
@@ -1794,7 +1763,7 @@ class TestHandleSchemaArgs:
# D3: DEPRECATED: Dropped after April 2027. Use ``package_reboot_if_required``. Default: ``false``
- Valid cloud-config: {}
+ Valid cloud-config: user-data
""" # noqa: E501
),
),
@@ -1806,7 +1775,7 @@ class TestHandleSchemaArgs:
apt_reboot_if_required: DEPRECATED: Dropped after April 2027. Use ``package_reboot_if_required``. Default: ``false``, \
apt_update: DEPRECATED: Dropped after April 2027. Use ``package_update``. Default: ``false``, \
apt_upgrade: DEPRECATED: Dropped after April 2027. Use ``package_upgrade``. Default: ``false``
- Valid cloud-config: {}
+ Valid cloud-config: user-data
""" # noqa: E501
),
),
@@ -1837,6 +1806,6 @@ apt_upgrade: DEPRECATED: Dropped after April 2027. Use ``package_upgrade``. Defa
)
handle_schema_args("unused", args)
out, err = capsys.readouterr()
- assert expected_output.format(user_data_fn) == out
+ assert expected_output.format(cfg_file=user_data_fn) == out
assert not err
assert "deprec" not in caplog.text
diff --git a/tests/unittests/test_safeyaml.py b/tests/unittests/test_safeyaml.py
index 5be09b21..8713546f 100644
--- a/tests/unittests/test_safeyaml.py
+++ b/tests/unittests/test_safeyaml.py
@@ -12,45 +12,60 @@ class TestLoadWithMarks:
"source_yaml,loaded_yaml,schemamarks",
(
# Invalid cloud-config, non-dict types don't cause an error
- (b"scalar", "scalar", {}),
- # Multiple keys account for comments and whitespace lines
- (
+ pytest.param(b"scalar", "scalar", {}, id="invalid_nondict_config"),
+ pytest.param(
b"#\na: va\n \nb: vb\n#\nc: vc",
{"a": "va", "b": "vb", "c": "vc"},
{"a": 2, "b": 4, "c": 6},
+ id="handle_whitespace_and_comments",
),
- # List items represented on correct line number
- (
+ pytest.param(
b"a:\n - a1\n\n - a2\n",
{"a": ["a1", "a2"]},
{"a": 1, "a.0": 2, "a.1": 4},
+ id="list_items",
),
- # Nested dicts represented on correct line number
- (
+ pytest.param(
b"a:\n a1:\n\n aa1: aa1v\n",
{"a": {"a1": {"aa1": "aa1v"}}},
{"a": 1, "a.a1": 2, "a.a1.aa1": 4},
+ id="nested_dicts_within_dicts",
),
- (b"[list, of, scalar]", ["list", "of", "scalar"], {}),
- (
+ pytest.param(
+ b"a:\n- a1\n\n- a2: av2\n a2b: av2b\n",
+ {"a": ["a1", {"a2": "av2", "a2b": "av2b"}]},
+ {"a": 1, "a.0": 2, "a.1": 4, "a.1.a2": 4, "a.1.a2b": 5},
+ id="nested_dicts_within_list",
+ ),
+ pytest.param(
+ b"[list, of, scalar]",
+ ["list", "of", "scalar"],
+ {},
+ id="list_of_scalar",
+ ),
+ pytest.param(
b"{a: [a1, a2], b: [b3]}",
{"a": ["a1", "a2"], "b": ["b3"]},
{"a": 1, "a.0": 1, "a.1": 1, "b": 1},
+ id="dict_of_lists_oneline",
),
- (
+ pytest.param(
b"a: [a1, a2]\nb: [b3]",
{"a": ["a1", "a2"], "b": ["b3"]},
{"a": 1, "a.0": 1, "a.1": 1, "b": 2, "b.0": 2},
+ id="dict_of_lists_multiline",
),
- (
+ pytest.param(
b"a:\n- a1\n- a2\nb: [b3]",
{"a": ["a1", "a2"], "b": ["b3"]},
{"a": 1, "a.0": 2, "a.1": 3, "b": 4, "b.0": 4},
+ id="separate_dicts_scalar_vs_nested_list",
),
- (
+ pytest.param(
b"a:\n- a1\n- a2\nb:\n- b3",
{"a": ["a1", "a2"], "b": ["b3"]},
{"a": 1, "a.0": 2, "a.1": 3, "b": 4, "b.0": 5},
+ id="separate_dicts_nestes_lists",
),
),
)