cli: schema also validate vendordata*.

cloud-init schema --annotate now walks any valid user-data, vendor-data and vendor2-data to report whether any of these cloud-config files provide invalid or deprecated schema values. Also, fix a bug in nested mapping annotations to properly report the full nested indexed schema path. The following now can be annotated without KeyErrors: users: - default - lock-passwd: false name: root
author: Chad Smith <chad.smith@canonical.com> 2023-02-09 15:50:33 -0700
committer: GitHub <noreply@github.com> 2023-02-09 15:50:33 -0700
commit: 55686b977570a6de69bef51c1e1a9d452333995d (patch)
tree: 0b36881db29374d18ac7728b348fbb229147455f
parent: b923a1c4fe740e9142d2c16b269d692a69919c0d (diff)
download: cloud-init-git-55686b977570a6de69bef51c1e1a9d452333995d.tar.gz
5 files changed, 195 insertions, 129 deletions
diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py
index 88590ace..8d2e4af9 100644
--- a/cloudinit/config/schema.py
+++ b/cloudinit/config/schema.py
@@ -11,7 +11,6 @@ import textwrap
 from collections import defaultdict
 from collections.abc import Iterable
 from copy import deepcopy
-from functools import partial
 from itertools import chain
 from typing import TYPE_CHECKING, List, NamedTuple, Optional, Type, Union, cast
 
@@ -29,7 +28,6 @@ except ImportError:
     ValidationError = Exception  # type: ignore
 
 
-error = partial(error, sys_exit=True)
 LOG = logging.getLogger(__name__)
 
 VERSIONED_USERDATA_SCHEMA_FILE = "versions.schema.cloud-config.json"
@@ -424,10 +422,14 @@ def validate_cloudconfig_schema(
 
     errors: SchemaProblems = []
     deprecations: SchemaProblems = []
-    for error in sorted(validator.iter_errors(config), key=lambda e: e.path):
-        path = ".".join([str(p) for p in error.path])
-        problem = (SchemaProblem(path, error.message),)
-        if isinstance(error, SchemaDeprecationError):  # pylint: disable=W1116
+    for schema_error in sorted(
+        validator.iter_errors(config), key=lambda e: e.path
+    ):
+        path = ".".join([str(p) for p in schema_error.path])
+        problem = (SchemaProblem(path, schema_error.message),)
+        if isinstance(
+            schema_error, SchemaDeprecationError
+        ):  # pylint: disable=W1116
             deprecations += problem
         else:
             errors += problem
@@ -610,23 +612,7 @@ def validate_cloudconfig_file(config_path, schema, annotate=False):
     @raises SchemaValidationError containing any of schema_errors encountered.
     @raises RuntimeError when config_path does not exist.
     """
-    if config_path is None:
-        # Use system's raw userdata path
-        if os.getuid() != 0:
-            raise RuntimeError(
-                "Unable to read system userdata as non-root user."
-                " Try using sudo"
-            )
-        init = Init(ds_deps=[])
-        init.fetch(existing="trust")
-        init.consume_data()
-        content = load_file(init.paths.get_ipath("cloud_config"), decode=False)
-    else:
-        if not os.path.exists(config_path):
-            raise RuntimeError(
-                "Configfile {0} does not exist".format(config_path)
-            )
-        content = load_file(config_path, decode=False)
+    content = load_file(config_path, decode=False)
     if not content.startswith(CLOUD_CONFIG_HEADER):
         errors = [
             SchemaProblem(
@@ -693,7 +679,7 @@ def validate_cloudconfig_file(config_path, schema, annotate=False):
                     schema_deprecations=e.schema_deprecations,
                 )
             )
-        else:
+        elif e.schema_deprecations:
             message = _format_schema_problems(
                 e.schema_deprecations,
                 prefix="Cloud config schema deprecations: ",
@@ -1063,7 +1049,8 @@ def load_doc(requested_modules: list) -> str:
             "Invalid --docs value {}. Must be one of: {}".format(
                 list(invalid_docs),
                 ", ".join(all_modules),
-            )
+            ),
+            sys_exit=True,
         )
     for mod_name in all_modules:
         if "all" in requested_modules or mod_name in requested_modules:
@@ -1164,28 +1151,80 @@ def handle_schema_args(name, args):
     """Handle provided schema args and perform the appropriate actions."""
     exclusive_args = [args.config_file, args.docs, args.system]
     if len([arg for arg in exclusive_args if arg]) != 1:
-        error("Expected one of --config-file, --system or --docs arguments")
+        error(
+            "Expected one of --config-file, --system or --docs arguments",
+            sys_exit=True,
+        )
     if args.annotate and args.docs:
-        error("Invalid flag combination. Cannot use --annotate with --docs")
+        error(
+            "Invalid flag combination. Cannot use --annotate with --docs",
+            sys_exit=True,
+        )
     full_schema = get_schema()
-    if args.config_file or args.system:
-        try:
-            validate_cloudconfig_file(
-                args.config_file, full_schema, args.annotate
+    if args.docs:
+        print(load_doc(args.docs))
+        return
+    if args.config_file:
+        config_files = (("user-data", args.config_file),)
+    elif args.system:
+        if os.getuid() != 0:
+            error(
+                "Unable to read system userdata or vendordata as non-root"
+                " user. Try using sudo.",
+                sys_exit=True,
             )
+        init = Init(ds_deps=[])
+        init.fetch(existing="trust")
+        userdata_file = init.paths.get_ipath("cloud_config")
+        config_files = (("user-data", userdata_file),)
+        vendor_config_files = (
+            ("vendor-data", init.paths.get_ipath("vendor_cloud_config")),
+            ("vendor2-data", init.paths.get_ipath("vendor2_cloud_config")),
+        )
+        for cfg_type, vendor_file in vendor_config_files:
+            if os.path.exists(vendor_file):
+                config_files += ((cfg_type, vendor_file),)
+    if not os.path.exists(config_files[0][1]):
+        error(
+            f"Config file {config_files[0][1]} does not exist",
+            fmt="Error: {}",
+            sys_exit=True,
+        )
+
+    nested_output_prefix = ""
+    multi_config_output = bool(len(config_files) > 1)
+    if multi_config_output:
+        print(
+            "Found cloud-config data types: %s"
+            % ", ".join(cfg_type for cfg_type, _ in config_files)
+        )
+        nested_output_prefix = "  "
+
+    error_types = []
+    for idx, (cfg_type, cfg_file) in enumerate(config_files, 1):
+        if multi_config_output:
+            print(f"\n{idx}. {cfg_type} at {cfg_file}:")
+        try:
+            validate_cloudconfig_file(cfg_file, full_schema, args.annotate)
         except SchemaValidationError as e:
             if not args.annotate:
-                error(str(e))
+                print(f"{nested_output_prefix}Invalid cloud-config {cfg_file}")
+                error(
+                    str(e),
+                    fmt=nested_output_prefix + "Error: {}\n",
+                )
+                error_types.append(cfg_type)
         except RuntimeError as e:
-            error(str(e))
+            print(f"{nested_output_prefix}Invalid cloud-config {cfg_type}")
+            error(str(e), fmt=nested_output_prefix + "Error: {}\n")
+            error_types.append(cfg_type)
         else:
-            if args.config_file is None:
-                cfg_name = "system userdata"
-            else:
-                cfg_name = args.config_file
-            print("Valid cloud-config:", cfg_name)
-    elif args.docs:
-        print(load_doc(args.docs))
+            print(f"{nested_output_prefix}Valid cloud-config: {cfg_type}")
+    if error_types:
+        error(
+            ", ".join(error_type for error_type in error_types),
+            fmt="Error: Invalid cloud-config schema: {}\n",
+        )
 
 
 def main():
diff --git a/cloudinit/safeyaml.py b/cloudinit/safeyaml.py
index 368ac861..4a3b6c84 100644
--- a/cloudinit/safeyaml.py
+++ b/cloudinit/safeyaml.py
@@ -25,6 +25,27 @@ class _CustomSafeLoader(yaml.SafeLoader):
         return super().construct_scalar(node)
 
 
+def _fix_nested_map_index(new_key_path, marks):
+    new_marks = []
+    for mark in marks:
+        if "." not in mark.path:
+            new_marks.append(mark)
+            continue
+        path_prefix, _path_idx = mark.path.rsplit(".", 1)
+        if new_key_path not in mark.path and path_prefix in mark.path:
+            new_marks.append(
+                SchemaPathMarks(
+                    # Replace only the first match of path_prefix
+                    mark.path.replace(path_prefix, new_key_path, 1),
+                    mark.start_mark,
+                    mark.end_mark,
+                )
+            )
+        else:
+            new_marks.append(mark)
+    return new_marks
+
+
 class _CustomSafeLoaderWithMarks(yaml.SafeLoader):
     """A loader which provides line and column start and end marks for YAML.
 
@@ -102,7 +123,30 @@ class _CustomSafeLoaderWithMarks(yaml.SafeLoader):
             if line_num not in self.schemamarks_by_line:
                 self.schemamarks_by_line[line_num] = [marks]
             else:
-                self.schemamarks_by_line[line_num].append(marks)
+                if line_num == sequence_item.end_mark.line:
+                    self.schemamarks_by_line[line_num].append(marks)
+                else:  # Incorrect multi-line mapping or sequence object.
+                    for inner_line in range(
+                        line_num, sequence_item.end_mark.line
+                    ):
+                        if inner_line in self.schemamarks_by_line:
+                            schema_marks = self.schemamarks_by_line[inner_line]
+                            new_marks = _fix_nested_map_index(
+                                node_key_path, schema_marks
+                            )
+                            if (
+                                inner_line == line_num
+                                and schema_marks[0].path != node_key_path
+                            ):
+                                new_marks.insert(
+                                    0,
+                                    SchemaPathMarks(
+                                        node_key_path,
+                                        schema_marks[0].start_mark,
+                                        schema_marks[-1].end_mark,
+                                    ),
+                                )
+                            self.schemamarks_by_line[inner_line] = new_marks
         return sequence
 
     def get_single_data(self):
diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py
index 51e4cd63..3336b23d 100644
--- a/cloudinit/user_data.py
+++ b/cloudinit/user_data.py
@@ -166,7 +166,6 @@ class UserDataProcessor:
             # TODO(harlowja): Should this be happening, shouldn't
             # the part header be modified and not the base?
             _replace_header(base_msg, CONTENT_TYPE, ctype)
-
             self._attach_part(append_msg, part)
 
     def _attach_launch_index(self, msg):
diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py
index 50128f2c..a3235790 100644
--- a/tests/unittests/config/test_schema.py
+++ b/tests/unittests/config/test_schema.py
@@ -17,7 +17,6 @@ from types import ModuleType
 from typing import List, Optional, Sequence, Set
 
 import pytest
-import responses
 
 from cloudinit import stages
 from cloudinit.config.schema import (
@@ -282,7 +281,7 @@ class TestValidateCloudConfigSchema:
         ((None, 1), ({"properties": {"p1": {"type": "string"}}}, 0)),
     )
     @skipUnlessJsonSchema()
-    @mock.patch("cloudinit.config.schema.get_schema")
+    @mock.patch(M_PATH + "get_schema")
     def test_validateconfig_schema_use_full_schema_when_no_schema_param(
         self, get_schema, schema, call_count
     ):
@@ -638,14 +637,6 @@ class TestValidateCloudConfigFile:
     """Tests for validate_cloudconfig_file."""
 
     @pytest.mark.parametrize("annotate", (True, False))
-    def test_validateconfig_file_error_on_absent_file(self, annotate):
-        """On absent config_path, validate_cloudconfig_file errors."""
-        with pytest.raises(
-            RuntimeError, match="Configfile /not/here does not exist"
-        ):
-            validate_cloudconfig_file("/not/here", {}, annotate)
-
-    @pytest.mark.parametrize("annotate", (True, False))
     def test_validateconfig_file_error_on_invalid_header(
         self, annotate, tmpdir
     ):
@@ -708,59 +699,10 @@ class TestValidateCloudConfigFile:
             validate_cloudconfig_file(config_file.strpath, schema, annotate)
 
     @skipUnlessJsonSchema()
-    @responses.activate
     @pytest.mark.parametrize("annotate", (True, False))
     @mock.patch("cloudinit.url_helper.time.sleep")
-    @mock.patch(M_PATH + "os.getuid", return_value=0)
-    def test_validateconfig_file_include_validates_schema(
-        self, m_getuid, m_sleep, annotate, mocker
-    ):
-        """validate_cloudconfig_file raises errors on invalid schema
-        when user-data uses `#include`."""
-        schema = {"properties": {"p1": {"type": "string", "format": "string"}}}
-        included_data = "#cloud-config\np1: -1"
-        included_url = "http://asdf/user-data"
-        blob = f"#include {included_url}"
-        responses.add(responses.GET, included_url, included_data)
-
-        ci = stages.Init()
-        ci.datasource = FakeDataSource(blob)
-        mocker.patch(M_PATH + "Init", return_value=ci)
-
-        error_msg = (
-            "Cloud config schema errors: p1: -1 is not of type 'string'"
-        )
-        with pytest.raises(SchemaValidationError, match=error_msg):
-            validate_cloudconfig_file(None, schema, annotate)
-
-    @skipUnlessJsonSchema()
-    @responses.activate
-    @pytest.mark.parametrize("annotate", (True, False))
-    @mock.patch("cloudinit.url_helper.time.sleep")
-    @mock.patch(M_PATH + "os.getuid", return_value=0)
-    def test_validateconfig_file_include_success(
-        self, m_getuid, m_sleep, annotate, mocker
-    ):
-        """validate_cloudconfig_file raises errors on invalid schema
-        when user-data uses `#include`."""
-        schema = {"properties": {"p1": {"type": "string", "format": "string"}}}
-        included_data = "#cloud-config\np1: asdf"
-        included_url = "http://asdf/user-data"
-        blob = f"#include {included_url}"
-        responses.add(responses.GET, included_url, included_data)
-
-        ci = stages.Init()
-        ci.datasource = FakeDataSource(blob)
-        mocker.patch(M_PATH + "Init", return_value=ci)
-
-        validate_cloudconfig_file(None, schema, annotate)
-
-    @skipUnlessJsonSchema()
-    @pytest.mark.parametrize("annotate", (True, False))
-    @mock.patch("cloudinit.url_helper.time.sleep")
-    @mock.patch(M_PATH + "os.getuid", return_value=0)
     def test_validateconfig_file_no_cloud_cfg(
-        self, m_getuid, m_sleep, annotate, capsys, mocker
+        self, m_sleep, annotate, capsys, mocker
     ):
         """validate_cloudconfig_file does noop with empty user-data."""
         schema = {"properties": {"p1": {"type": "string", "format": "string"}}}
@@ -769,15 +711,18 @@ class TestValidateCloudConfigFile:
         ci = stages.Init()
         ci.datasource = FakeDataSource(blob)
         mocker.patch(M_PATH + "Init", return_value=ci)
+        cloud_config_file = ci.paths.get_ipath_cur("cloud_config")
+        write_file(cloud_config_file, b"")
 
         with pytest.raises(
             SchemaValidationError,
             match=re.escape(
-                "Cloud config schema errors: format-l1.c1: File None needs"
-                ' to begin with "#cloud-config"'
+                "Cloud config schema errors: format-l1.c1:"
+                f" File {cloud_config_file} needs to begin with"
+                ' "#cloud-config"'
             ),
         ):
-            validate_cloudconfig_file(None, schema, annotate)
+            validate_cloudconfig_file(cloud_config_file, schema, annotate)
 
 
 class TestSchemaDocMarkdown:
@@ -1582,7 +1527,7 @@ class TestMain:
                 main()
         assert 1 == context_manager.value.code
         _out, err = capsys.readouterr()
-        assert "Error:\nConfigfile NOT_A_FILE does not exist\n" == err
+        assert "Error: Config file NOT_A_FILE does not exist\n" == err
 
     def test_main_invalid_flag_combo(self, capsys):
         """Main exits non-zero when invalid flag combo used."""
@@ -1614,24 +1559,48 @@ class TestMain:
         with mock.patch("sys.argv", myargs):
             assert 0 == main(), "Expected 0 exit code"
         out, _err = capsys.readouterr()
-        assert "Valid cloud-config: {0}\n".format(myyaml) == out
+        assert "Valid cloud-config: user-data\n" == out
 
     @mock.patch(M_PATH + "os.getuid", return_value=0)
-    def test_main_validates_system_userdata(
-        self, m_getuid, capsys, mocker, paths
+    def test_main_validates_system_userdata_and_vendordata(
+        self, _getuid, capsys, mocker, paths
     ):
         """When --system is provided, main validates system userdata."""
         m_init = mocker.patch(M_PATH + "Init")
         m_init.return_value.paths.get_ipath = paths.get_ipath_cur
         cloud_config_file = paths.get_ipath_cur("cloud_config")
         write_file(cloud_config_file, b"#cloud-config\nntp:")
+        vd_file = paths.get_ipath_cur("vendor_cloud_config")
+        write_file(vd_file, b"#cloud-config\nssh_import_id: [me]")
+        vd2_file = paths.get_ipath_cur("vendor2_cloud_config")
+        write_file(vd2_file, b"#cloud-config\nssh_pw_auth: true")
         myargs = ["mycmd", "--system"]
         with mock.patch("sys.argv", myargs):
-            assert 0 == main(), "Expected 0 exit code"
+            main()
         out, _err = capsys.readouterr()
-        assert "Valid cloud-config: system userdata\n" == out
 
-    @mock.patch("cloudinit.config.schema.os.getuid", return_value=1000)
+        expected = dedent(
+            """\
+        Found cloud-config data types: user-data, vendor-data, vendor2-data
+
+        1. user-data at {ud_file}:
+          Valid cloud-config: user-data
+
+        2. vendor-data at {vd_file}:
+          Valid cloud-config: vendor-data
+
+        3. vendor2-data at {vd2_file}:
+          Valid cloud-config: vendor2-data
+        """
+        )
+        assert (
+            expected.format(
+                ud_file=cloud_config_file, vd_file=vd_file, vd2_file=vd2_file
+            )
+            == out
+        )
+
+    @mock.patch(M_PATH + "os.getuid", return_value=1000)
     def test_main_system_userdata_requires_root(self, m_getuid, capsys, paths):
         """Non-root user can't use --system param"""
         myargs = ["mycmd", "--system"]
@@ -1641,8 +1610,8 @@ class TestMain:
         assert 1 == context_manager.value.code
         _out, err = capsys.readouterr()
         expected = (
-            "Error:\nUnable to read system userdata as non-root user. "
-            "Try using sudo\n"
+            "Error:\nUnable to read system userdata or vendordata as non-root"
+            " user. Try using sudo.\n"
         )
         assert expected == err
 
@@ -1794,7 +1763,7 @@ class TestHandleSchemaArgs:
                     # D3: DEPRECATED: Dropped after April 2027. Use ``package_reboot_if_required``. Default: ``false``
 
 
-                    Valid cloud-config: {}
+                    Valid cloud-config: user-data
                     """  # noqa: E501
                 ),
             ),
@@ -1806,7 +1775,7 @@ class TestHandleSchemaArgs:
 apt_reboot_if_required: DEPRECATED: Dropped after April 2027. Use ``package_reboot_if_required``. Default: ``false``, \
 apt_update: DEPRECATED: Dropped after April 2027. Use ``package_update``. Default: ``false``, \
 apt_upgrade: DEPRECATED: Dropped after April 2027. Use ``package_upgrade``. Default: ``false``
-                    Valid cloud-config: {}
+                    Valid cloud-config: user-data
                     """  # noqa: E501
                 ),
             ),
@@ -1837,6 +1806,6 @@ apt_upgrade: DEPRECATED: Dropped after April 2027. Use ``package_upgrade``. Defa
         )
         handle_schema_args("unused", args)
         out, err = capsys.readouterr()
-        assert expected_output.format(user_data_fn) == out
+        assert expected_output.format(cfg_file=user_data_fn) == out
         assert not err
         assert "deprec" not in caplog.text
diff --git a/tests/unittests/test_safeyaml.py b/tests/unittests/test_safeyaml.py
index 5be09b21..8713546f 100644
--- a/tests/unittests/test_safeyaml.py
+++ b/tests/unittests/test_safeyaml.py
@@ -12,45 +12,60 @@ class TestLoadWithMarks:
         "source_yaml,loaded_yaml,schemamarks",
         (
             # Invalid cloud-config, non-dict types don't cause an error
-            (b"scalar", "scalar", {}),
-            # Multiple keys account for comments and whitespace lines
-            (
+            pytest.param(b"scalar", "scalar", {}, id="invalid_nondict_config"),
+            pytest.param(
                 b"#\na: va\n  \nb: vb\n#\nc: vc",
                 {"a": "va", "b": "vb", "c": "vc"},
                 {"a": 2, "b": 4, "c": 6},
+                id="handle_whitespace_and_comments",
             ),
-            # List items represented on correct line number
-            (
+            pytest.param(
                 b"a:\n - a1\n\n - a2\n",
                 {"a": ["a1", "a2"]},
                 {"a": 1, "a.0": 2, "a.1": 4},
+                id="list_items",
             ),
-            # Nested dicts represented on correct line number
-            (
+            pytest.param(
                 b"a:\n a1:\n\n  aa1: aa1v\n",
                 {"a": {"a1": {"aa1": "aa1v"}}},
                 {"a": 1, "a.a1": 2, "a.a1.aa1": 4},
+                id="nested_dicts_within_dicts",
             ),
-            (b"[list, of, scalar]", ["list", "of", "scalar"], {}),
-            (
+            pytest.param(
+                b"a:\n- a1\n\n- a2: av2\n  a2b: av2b\n",
+                {"a": ["a1", {"a2": "av2", "a2b": "av2b"}]},
+                {"a": 1, "a.0": 2, "a.1": 4, "a.1.a2": 4, "a.1.a2b": 5},
+                id="nested_dicts_within_list",
+            ),
+            pytest.param(
+                b"[list, of, scalar]",
+                ["list", "of", "scalar"],
+                {},
+                id="list_of_scalar",
+            ),
+            pytest.param(
                 b"{a: [a1, a2], b: [b3]}",
                 {"a": ["a1", "a2"], "b": ["b3"]},
                 {"a": 1, "a.0": 1, "a.1": 1, "b": 1},
+                id="dict_of_lists_oneline",
             ),
-            (
+            pytest.param(
                 b"a: [a1, a2]\nb: [b3]",
                 {"a": ["a1", "a2"], "b": ["b3"]},
                 {"a": 1, "a.0": 1, "a.1": 1, "b": 2, "b.0": 2},
+                id="dict_of_lists_multiline",
             ),
-            (
+            pytest.param(
                 b"a:\n- a1\n- a2\nb: [b3]",
                 {"a": ["a1", "a2"], "b": ["b3"]},
                 {"a": 1, "a.0": 2, "a.1": 3, "b": 4, "b.0": 4},
+                id="separate_dicts_scalar_vs_nested_list",
             ),
-            (
+            pytest.param(
                 b"a:\n- a1\n- a2\nb:\n- b3",
                 {"a": ["a1", "a2"], "b": ["b3"]},
                 {"a": 1, "a.0": 2, "a.1": 3, "b": 4, "b.0": 5},
+                id="separate_dicts_nestes_lists",
             ),
         ),
     )
author	Chad Smith <chad.smith@canonical.com>	2023-02-09 15:50:33 -0700
committer	GitHub <noreply@github.com>	2023-02-09 15:50:33 -0700
commit	55686b977570a6de69bef51c1e1a9d452333995d (patch)
tree	0b36881db29374d18ac7728b348fbb229147455f
parent	b923a1c4fe740e9142d2c16b269d692a69919c0d (diff)
download	cloud-init-git-55686b977570a6de69bef51c1e1a9d452333995d.tar.gz