diff options
author | Chad Smith <chad.smith@canonical.com> | 2023-02-09 15:50:33 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-02-09 15:50:33 -0700 |
commit | 55686b977570a6de69bef51c1e1a9d452333995d (patch) | |
tree | 0b36881db29374d18ac7728b348fbb229147455f | |
parent | b923a1c4fe740e9142d2c16b269d692a69919c0d (diff) | |
download | cloud-init-git-55686b977570a6de69bef51c1e1a9d452333995d.tar.gz |
cli: schema also validate vendordata*.
cloud-init schema --annotate now walks any valid
user-data, vendor-data and vendor2-data to report
whether any of these cloud-config files provide invalid
or deprecated schema values.
Also, fix a bug in nested mapping annotations to
properly report the full nested indexed schema path.
The following now can be annotated without KeyErrors:
users:
- default
- lock-passwd: false
name: root
-rw-r--r-- | cloudinit/config/schema.py | 119 | ||||
-rw-r--r-- | cloudinit/safeyaml.py | 46 | ||||
-rw-r--r-- | cloudinit/user_data.py | 1 | ||||
-rw-r--r-- | tests/unittests/config/test_schema.py | 119 | ||||
-rw-r--r-- | tests/unittests/test_safeyaml.py | 39 |
5 files changed, 195 insertions, 129 deletions
diff --git a/cloudinit/config/schema.py b/cloudinit/config/schema.py index 88590ace..8d2e4af9 100644 --- a/cloudinit/config/schema.py +++ b/cloudinit/config/schema.py @@ -11,7 +11,6 @@ import textwrap from collections import defaultdict from collections.abc import Iterable from copy import deepcopy -from functools import partial from itertools import chain from typing import TYPE_CHECKING, List, NamedTuple, Optional, Type, Union, cast @@ -29,7 +28,6 @@ except ImportError: ValidationError = Exception # type: ignore -error = partial(error, sys_exit=True) LOG = logging.getLogger(__name__) VERSIONED_USERDATA_SCHEMA_FILE = "versions.schema.cloud-config.json" @@ -424,10 +422,14 @@ def validate_cloudconfig_schema( errors: SchemaProblems = [] deprecations: SchemaProblems = [] - for error in sorted(validator.iter_errors(config), key=lambda e: e.path): - path = ".".join([str(p) for p in error.path]) - problem = (SchemaProblem(path, error.message),) - if isinstance(error, SchemaDeprecationError): # pylint: disable=W1116 + for schema_error in sorted( + validator.iter_errors(config), key=lambda e: e.path + ): + path = ".".join([str(p) for p in schema_error.path]) + problem = (SchemaProblem(path, schema_error.message),) + if isinstance( + schema_error, SchemaDeprecationError + ): # pylint: disable=W1116 deprecations += problem else: errors += problem @@ -610,23 +612,7 @@ def validate_cloudconfig_file(config_path, schema, annotate=False): @raises SchemaValidationError containing any of schema_errors encountered. @raises RuntimeError when config_path does not exist. """ - if config_path is None: - # Use system's raw userdata path - if os.getuid() != 0: - raise RuntimeError( - "Unable to read system userdata as non-root user." - " Try using sudo" - ) - init = Init(ds_deps=[]) - init.fetch(existing="trust") - init.consume_data() - content = load_file(init.paths.get_ipath("cloud_config"), decode=False) - else: - if not os.path.exists(config_path): - raise RuntimeError( - "Configfile {0} does not exist".format(config_path) - ) - content = load_file(config_path, decode=False) + content = load_file(config_path, decode=False) if not content.startswith(CLOUD_CONFIG_HEADER): errors = [ SchemaProblem( @@ -693,7 +679,7 @@ def validate_cloudconfig_file(config_path, schema, annotate=False): schema_deprecations=e.schema_deprecations, ) ) - else: + elif e.schema_deprecations: message = _format_schema_problems( e.schema_deprecations, prefix="Cloud config schema deprecations: ", @@ -1063,7 +1049,8 @@ def load_doc(requested_modules: list) -> str: "Invalid --docs value {}. Must be one of: {}".format( list(invalid_docs), ", ".join(all_modules), - ) + ), + sys_exit=True, ) for mod_name in all_modules: if "all" in requested_modules or mod_name in requested_modules: @@ -1164,28 +1151,80 @@ def handle_schema_args(name, args): """Handle provided schema args and perform the appropriate actions.""" exclusive_args = [args.config_file, args.docs, args.system] if len([arg for arg in exclusive_args if arg]) != 1: - error("Expected one of --config-file, --system or --docs arguments") + error( + "Expected one of --config-file, --system or --docs arguments", + sys_exit=True, + ) if args.annotate and args.docs: - error("Invalid flag combination. Cannot use --annotate with --docs") + error( + "Invalid flag combination. Cannot use --annotate with --docs", + sys_exit=True, + ) full_schema = get_schema() - if args.config_file or args.system: - try: - validate_cloudconfig_file( - args.config_file, full_schema, args.annotate + if args.docs: + print(load_doc(args.docs)) + return + if args.config_file: + config_files = (("user-data", args.config_file),) + elif args.system: + if os.getuid() != 0: + error( + "Unable to read system userdata or vendordata as non-root" + " user. Try using sudo.", + sys_exit=True, ) + init = Init(ds_deps=[]) + init.fetch(existing="trust") + userdata_file = init.paths.get_ipath("cloud_config") + config_files = (("user-data", userdata_file),) + vendor_config_files = ( + ("vendor-data", init.paths.get_ipath("vendor_cloud_config")), + ("vendor2-data", init.paths.get_ipath("vendor2_cloud_config")), + ) + for cfg_type, vendor_file in vendor_config_files: + if os.path.exists(vendor_file): + config_files += ((cfg_type, vendor_file),) + if not os.path.exists(config_files[0][1]): + error( + f"Config file {config_files[0][1]} does not exist", + fmt="Error: {}", + sys_exit=True, + ) + + nested_output_prefix = "" + multi_config_output = bool(len(config_files) > 1) + if multi_config_output: + print( + "Found cloud-config data types: %s" + % ", ".join(cfg_type for cfg_type, _ in config_files) + ) + nested_output_prefix = " " + + error_types = [] + for idx, (cfg_type, cfg_file) in enumerate(config_files, 1): + if multi_config_output: + print(f"\n{idx}. {cfg_type} at {cfg_file}:") + try: + validate_cloudconfig_file(cfg_file, full_schema, args.annotate) except SchemaValidationError as e: if not args.annotate: - error(str(e)) + print(f"{nested_output_prefix}Invalid cloud-config {cfg_file}") + error( + str(e), + fmt=nested_output_prefix + "Error: {}\n", + ) + error_types.append(cfg_type) except RuntimeError as e: - error(str(e)) + print(f"{nested_output_prefix}Invalid cloud-config {cfg_type}") + error(str(e), fmt=nested_output_prefix + "Error: {}\n") + error_types.append(cfg_type) else: - if args.config_file is None: - cfg_name = "system userdata" - else: - cfg_name = args.config_file - print("Valid cloud-config:", cfg_name) - elif args.docs: - print(load_doc(args.docs)) + print(f"{nested_output_prefix}Valid cloud-config: {cfg_type}") + if error_types: + error( + ", ".join(error_type for error_type in error_types), + fmt="Error: Invalid cloud-config schema: {}\n", + ) def main(): diff --git a/cloudinit/safeyaml.py b/cloudinit/safeyaml.py index 368ac861..4a3b6c84 100644 --- a/cloudinit/safeyaml.py +++ b/cloudinit/safeyaml.py @@ -25,6 +25,27 @@ class _CustomSafeLoader(yaml.SafeLoader): return super().construct_scalar(node) +def _fix_nested_map_index(new_key_path, marks): + new_marks = [] + for mark in marks: + if "." not in mark.path: + new_marks.append(mark) + continue + path_prefix, _path_idx = mark.path.rsplit(".", 1) + if new_key_path not in mark.path and path_prefix in mark.path: + new_marks.append( + SchemaPathMarks( + # Replace only the first match of path_prefix + mark.path.replace(path_prefix, new_key_path, 1), + mark.start_mark, + mark.end_mark, + ) + ) + else: + new_marks.append(mark) + return new_marks + + class _CustomSafeLoaderWithMarks(yaml.SafeLoader): """A loader which provides line and column start and end marks for YAML. @@ -102,7 +123,30 @@ class _CustomSafeLoaderWithMarks(yaml.SafeLoader): if line_num not in self.schemamarks_by_line: self.schemamarks_by_line[line_num] = [marks] else: - self.schemamarks_by_line[line_num].append(marks) + if line_num == sequence_item.end_mark.line: + self.schemamarks_by_line[line_num].append(marks) + else: # Incorrect multi-line mapping or sequence object. + for inner_line in range( + line_num, sequence_item.end_mark.line + ): + if inner_line in self.schemamarks_by_line: + schema_marks = self.schemamarks_by_line[inner_line] + new_marks = _fix_nested_map_index( + node_key_path, schema_marks + ) + if ( + inner_line == line_num + and schema_marks[0].path != node_key_path + ): + new_marks.insert( + 0, + SchemaPathMarks( + node_key_path, + schema_marks[0].start_mark, + schema_marks[-1].end_mark, + ), + ) + self.schemamarks_by_line[inner_line] = new_marks return sequence def get_single_data(self): diff --git a/cloudinit/user_data.py b/cloudinit/user_data.py index 51e4cd63..3336b23d 100644 --- a/cloudinit/user_data.py +++ b/cloudinit/user_data.py @@ -166,7 +166,6 @@ class UserDataProcessor: # TODO(harlowja): Should this be happening, shouldn't # the part header be modified and not the base? _replace_header(base_msg, CONTENT_TYPE, ctype) - self._attach_part(append_msg, part) def _attach_launch_index(self, msg): diff --git a/tests/unittests/config/test_schema.py b/tests/unittests/config/test_schema.py index 50128f2c..a3235790 100644 --- a/tests/unittests/config/test_schema.py +++ b/tests/unittests/config/test_schema.py @@ -17,7 +17,6 @@ from types import ModuleType from typing import List, Optional, Sequence, Set import pytest -import responses from cloudinit import stages from cloudinit.config.schema import ( @@ -282,7 +281,7 @@ class TestValidateCloudConfigSchema: ((None, 1), ({"properties": {"p1": {"type": "string"}}}, 0)), ) @skipUnlessJsonSchema() - @mock.patch("cloudinit.config.schema.get_schema") + @mock.patch(M_PATH + "get_schema") def test_validateconfig_schema_use_full_schema_when_no_schema_param( self, get_schema, schema, call_count ): @@ -638,14 +637,6 @@ class TestValidateCloudConfigFile: """Tests for validate_cloudconfig_file.""" @pytest.mark.parametrize("annotate", (True, False)) - def test_validateconfig_file_error_on_absent_file(self, annotate): - """On absent config_path, validate_cloudconfig_file errors.""" - with pytest.raises( - RuntimeError, match="Configfile /not/here does not exist" - ): - validate_cloudconfig_file("/not/here", {}, annotate) - - @pytest.mark.parametrize("annotate", (True, False)) def test_validateconfig_file_error_on_invalid_header( self, annotate, tmpdir ): @@ -708,59 +699,10 @@ class TestValidateCloudConfigFile: validate_cloudconfig_file(config_file.strpath, schema, annotate) @skipUnlessJsonSchema() - @responses.activate @pytest.mark.parametrize("annotate", (True, False)) @mock.patch("cloudinit.url_helper.time.sleep") - @mock.patch(M_PATH + "os.getuid", return_value=0) - def test_validateconfig_file_include_validates_schema( - self, m_getuid, m_sleep, annotate, mocker - ): - """validate_cloudconfig_file raises errors on invalid schema - when user-data uses `#include`.""" - schema = {"properties": {"p1": {"type": "string", "format": "string"}}} - included_data = "#cloud-config\np1: -1" - included_url = "http://asdf/user-data" - blob = f"#include {included_url}" - responses.add(responses.GET, included_url, included_data) - - ci = stages.Init() - ci.datasource = FakeDataSource(blob) - mocker.patch(M_PATH + "Init", return_value=ci) - - error_msg = ( - "Cloud config schema errors: p1: -1 is not of type 'string'" - ) - with pytest.raises(SchemaValidationError, match=error_msg): - validate_cloudconfig_file(None, schema, annotate) - - @skipUnlessJsonSchema() - @responses.activate - @pytest.mark.parametrize("annotate", (True, False)) - @mock.patch("cloudinit.url_helper.time.sleep") - @mock.patch(M_PATH + "os.getuid", return_value=0) - def test_validateconfig_file_include_success( - self, m_getuid, m_sleep, annotate, mocker - ): - """validate_cloudconfig_file raises errors on invalid schema - when user-data uses `#include`.""" - schema = {"properties": {"p1": {"type": "string", "format": "string"}}} - included_data = "#cloud-config\np1: asdf" - included_url = "http://asdf/user-data" - blob = f"#include {included_url}" - responses.add(responses.GET, included_url, included_data) - - ci = stages.Init() - ci.datasource = FakeDataSource(blob) - mocker.patch(M_PATH + "Init", return_value=ci) - - validate_cloudconfig_file(None, schema, annotate) - - @skipUnlessJsonSchema() - @pytest.mark.parametrize("annotate", (True, False)) - @mock.patch("cloudinit.url_helper.time.sleep") - @mock.patch(M_PATH + "os.getuid", return_value=0) def test_validateconfig_file_no_cloud_cfg( - self, m_getuid, m_sleep, annotate, capsys, mocker + self, m_sleep, annotate, capsys, mocker ): """validate_cloudconfig_file does noop with empty user-data.""" schema = {"properties": {"p1": {"type": "string", "format": "string"}}} @@ -769,15 +711,18 @@ class TestValidateCloudConfigFile: ci = stages.Init() ci.datasource = FakeDataSource(blob) mocker.patch(M_PATH + "Init", return_value=ci) + cloud_config_file = ci.paths.get_ipath_cur("cloud_config") + write_file(cloud_config_file, b"") with pytest.raises( SchemaValidationError, match=re.escape( - "Cloud config schema errors: format-l1.c1: File None needs" - ' to begin with "#cloud-config"' + "Cloud config schema errors: format-l1.c1:" + f" File {cloud_config_file} needs to begin with" + ' "#cloud-config"' ), ): - validate_cloudconfig_file(None, schema, annotate) + validate_cloudconfig_file(cloud_config_file, schema, annotate) class TestSchemaDocMarkdown: @@ -1582,7 +1527,7 @@ class TestMain: main() assert 1 == context_manager.value.code _out, err = capsys.readouterr() - assert "Error:\nConfigfile NOT_A_FILE does not exist\n" == err + assert "Error: Config file NOT_A_FILE does not exist\n" == err def test_main_invalid_flag_combo(self, capsys): """Main exits non-zero when invalid flag combo used.""" @@ -1614,24 +1559,48 @@ class TestMain: with mock.patch("sys.argv", myargs): assert 0 == main(), "Expected 0 exit code" out, _err = capsys.readouterr() - assert "Valid cloud-config: {0}\n".format(myyaml) == out + assert "Valid cloud-config: user-data\n" == out @mock.patch(M_PATH + "os.getuid", return_value=0) - def test_main_validates_system_userdata( - self, m_getuid, capsys, mocker, paths + def test_main_validates_system_userdata_and_vendordata( + self, _getuid, capsys, mocker, paths ): """When --system is provided, main validates system userdata.""" m_init = mocker.patch(M_PATH + "Init") m_init.return_value.paths.get_ipath = paths.get_ipath_cur cloud_config_file = paths.get_ipath_cur("cloud_config") write_file(cloud_config_file, b"#cloud-config\nntp:") + vd_file = paths.get_ipath_cur("vendor_cloud_config") + write_file(vd_file, b"#cloud-config\nssh_import_id: [me]") + vd2_file = paths.get_ipath_cur("vendor2_cloud_config") + write_file(vd2_file, b"#cloud-config\nssh_pw_auth: true") myargs = ["mycmd", "--system"] with mock.patch("sys.argv", myargs): - assert 0 == main(), "Expected 0 exit code" + main() out, _err = capsys.readouterr() - assert "Valid cloud-config: system userdata\n" == out - @mock.patch("cloudinit.config.schema.os.getuid", return_value=1000) + expected = dedent( + """\ + Found cloud-config data types: user-data, vendor-data, vendor2-data + + 1. user-data at {ud_file}: + Valid cloud-config: user-data + + 2. vendor-data at {vd_file}: + Valid cloud-config: vendor-data + + 3. vendor2-data at {vd2_file}: + Valid cloud-config: vendor2-data + """ + ) + assert ( + expected.format( + ud_file=cloud_config_file, vd_file=vd_file, vd2_file=vd2_file + ) + == out + ) + + @mock.patch(M_PATH + "os.getuid", return_value=1000) def test_main_system_userdata_requires_root(self, m_getuid, capsys, paths): """Non-root user can't use --system param""" myargs = ["mycmd", "--system"] @@ -1641,8 +1610,8 @@ class TestMain: assert 1 == context_manager.value.code _out, err = capsys.readouterr() expected = ( - "Error:\nUnable to read system userdata as non-root user. " - "Try using sudo\n" + "Error:\nUnable to read system userdata or vendordata as non-root" + " user. Try using sudo.\n" ) assert expected == err @@ -1794,7 +1763,7 @@ class TestHandleSchemaArgs: # D3: DEPRECATED: Dropped after April 2027. Use ``package_reboot_if_required``. Default: ``false`` - Valid cloud-config: {} + Valid cloud-config: user-data """ # noqa: E501 ), ), @@ -1806,7 +1775,7 @@ class TestHandleSchemaArgs: apt_reboot_if_required: DEPRECATED: Dropped after April 2027. Use ``package_reboot_if_required``. Default: ``false``, \ apt_update: DEPRECATED: Dropped after April 2027. Use ``package_update``. Default: ``false``, \ apt_upgrade: DEPRECATED: Dropped after April 2027. Use ``package_upgrade``. Default: ``false`` - Valid cloud-config: {} + Valid cloud-config: user-data """ # noqa: E501 ), ), @@ -1837,6 +1806,6 @@ apt_upgrade: DEPRECATED: Dropped after April 2027. Use ``package_upgrade``. Defa ) handle_schema_args("unused", args) out, err = capsys.readouterr() - assert expected_output.format(user_data_fn) == out + assert expected_output.format(cfg_file=user_data_fn) == out assert not err assert "deprec" not in caplog.text diff --git a/tests/unittests/test_safeyaml.py b/tests/unittests/test_safeyaml.py index 5be09b21..8713546f 100644 --- a/tests/unittests/test_safeyaml.py +++ b/tests/unittests/test_safeyaml.py @@ -12,45 +12,60 @@ class TestLoadWithMarks: "source_yaml,loaded_yaml,schemamarks", ( # Invalid cloud-config, non-dict types don't cause an error - (b"scalar", "scalar", {}), - # Multiple keys account for comments and whitespace lines - ( + pytest.param(b"scalar", "scalar", {}, id="invalid_nondict_config"), + pytest.param( b"#\na: va\n \nb: vb\n#\nc: vc", {"a": "va", "b": "vb", "c": "vc"}, {"a": 2, "b": 4, "c": 6}, + id="handle_whitespace_and_comments", ), - # List items represented on correct line number - ( + pytest.param( b"a:\n - a1\n\n - a2\n", {"a": ["a1", "a2"]}, {"a": 1, "a.0": 2, "a.1": 4}, + id="list_items", ), - # Nested dicts represented on correct line number - ( + pytest.param( b"a:\n a1:\n\n aa1: aa1v\n", {"a": {"a1": {"aa1": "aa1v"}}}, {"a": 1, "a.a1": 2, "a.a1.aa1": 4}, + id="nested_dicts_within_dicts", ), - (b"[list, of, scalar]", ["list", "of", "scalar"], {}), - ( + pytest.param( + b"a:\n- a1\n\n- a2: av2\n a2b: av2b\n", + {"a": ["a1", {"a2": "av2", "a2b": "av2b"}]}, + {"a": 1, "a.0": 2, "a.1": 4, "a.1.a2": 4, "a.1.a2b": 5}, + id="nested_dicts_within_list", + ), + pytest.param( + b"[list, of, scalar]", + ["list", "of", "scalar"], + {}, + id="list_of_scalar", + ), + pytest.param( b"{a: [a1, a2], b: [b3]}", {"a": ["a1", "a2"], "b": ["b3"]}, {"a": 1, "a.0": 1, "a.1": 1, "b": 1}, + id="dict_of_lists_oneline", ), - ( + pytest.param( b"a: [a1, a2]\nb: [b3]", {"a": ["a1", "a2"], "b": ["b3"]}, {"a": 1, "a.0": 1, "a.1": 1, "b": 2, "b.0": 2}, + id="dict_of_lists_multiline", ), - ( + pytest.param( b"a:\n- a1\n- a2\nb: [b3]", {"a": ["a1", "a2"], "b": ["b3"]}, {"a": 1, "a.0": 2, "a.1": 3, "b": 4, "b.0": 4}, + id="separate_dicts_scalar_vs_nested_list", ), - ( + pytest.param( b"a:\n- a1\n- a2\nb:\n- b3", {"a": ["a1", "a2"], "b": ["b3"]}, {"a": 1, "a.0": 2, "a.1": 3, "b": 4, "b.0": 5}, + id="separate_dicts_nestes_lists", ), ), ) |