summaryrefslogtreecommitdiff
path: root/cloudinit/sources/DataSourceAzure.py
diff options
context:
space:
mode:
Diffstat (limited to 'cloudinit/sources/DataSourceAzure.py')
-rw-r--r--cloudinit/sources/DataSourceAzure.py326
1 files changed, 28 insertions, 298 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py
index 9dac4c6b..b7d3e5a3 100644
--- a/cloudinit/sources/DataSourceAzure.py
+++ b/cloudinit/sources/DataSourceAzure.py
@@ -6,7 +6,6 @@
import base64
import crypt
-import functools
import os
import os.path
import re
@@ -16,8 +15,6 @@ from pathlib import Path
from time import sleep, time
from typing import Any, Dict, List, Optional
-import requests
-
from cloudinit import dmi
from cloudinit import log as logging
from cloudinit import net, sources, ssh_util, subp, util
@@ -29,6 +26,7 @@ from cloudinit.net.dhcp import (
)
from cloudinit.net.ephemeral import EphemeralDHCPv4
from cloudinit.reporting import events
+from cloudinit.sources.azure import imds
from cloudinit.sources.helpers import netlink
from cloudinit.sources.helpers.azure import (
DEFAULT_WIRESERVER_ENDPOINT,
@@ -49,7 +47,7 @@ from cloudinit.sources.helpers.azure import (
report_diagnostic_event,
report_failure_to_fabric,
)
-from cloudinit.url_helper import UrlError, readurl, retry_on_url_exc
+from cloudinit.url_helper import UrlError
LOG = logging.getLogger(__name__)
@@ -63,34 +61,6 @@ DEFAULT_FS = "ext4"
AGENT_SEED_DIR = "/var/lib/waagent"
DEFAULT_PROVISIONING_ISO_DEV = "/dev/sr0"
-# In the event where the IMDS primary server is not
-# available, it takes 1s to fallback to the secondary one
-IMDS_TIMEOUT_IN_SECONDS = 2
-IMDS_URL = "http://169.254.169.254/metadata"
-IMDS_VER_MIN = "2019-06-01"
-IMDS_VER_WANT = "2021-08-01"
-IMDS_EXTENDED_VER_MIN = "2021-03-01"
-IMDS_RETRY_CODES = (
- 404, # not found (yet)
- 410, # gone / unavailable (yet)
- 429, # rate-limited/throttled
- 500, # server error
-)
-imds_readurl_exception_callback = functools.partial(
- retry_on_url_exc,
- retry_codes=IMDS_RETRY_CODES,
- retry_instances=(
- requests.ConnectionError,
- requests.Timeout,
- ),
-)
-
-
-class MetadataType(Enum):
- ALL = "{}/instance".format(IMDS_URL)
- NETWORK = "{}/instance/network".format(IMDS_URL)
- REPROVISION_DATA = "{}/reprovisiondata".format(IMDS_URL)
-
class PPSType(Enum):
NONE = "None"
@@ -593,10 +563,9 @@ class DataSourceAzure(sources.DataSource):
except NoDHCPLeaseError:
pass
+ imds_md = {}
if self._is_ephemeral_networking_up():
- imds_md = self.get_imds_data_with_api_fallback(retries=10)
- else:
- imds_md = {}
+ imds_md = self.get_metadata_from_imds()
if not imds_md and ovf_source is None:
msg = "No OVF or IMDS available"
@@ -619,7 +588,7 @@ class DataSourceAzure(sources.DataSource):
md, userdata_raw, cfg, files = self._reprovision()
# fetch metadata again as it has changed after reprovisioning
- imds_md = self.get_imds_data_with_api_fallback(retries=10)
+ imds_md = self.get_metadata_from_imds()
# Report errors if IMDS network configuration is missing data.
self.validate_imds_network_metadata(imds_md=imds_md)
@@ -710,6 +679,17 @@ class DataSourceAzure(sources.DataSource):
return crawled_data
+ @azure_ds_telemetry_reporter
+ def get_metadata_from_imds(self) -> Dict:
+ try:
+ return imds.fetch_metadata_with_api_fallback()
+ except (UrlError, ValueError) as error:
+ report_diagnostic_event(
+ "Ignoring IMDS metadata due to: %s" % error,
+ logger_func=LOG.warning,
+ )
+ return {}
+
def clear_cached_attrs(self, attr_defaults=()):
"""Reset any cached class attributes to defaults."""
super(DataSourceAzure, self).clear_cached_attrs(attr_defaults)
@@ -795,54 +775,6 @@ class DataSourceAzure(sources.DataSource):
)
return True
- @azure_ds_telemetry_reporter
- def get_imds_data_with_api_fallback(
- self,
- *,
- retries: int,
- md_type: MetadataType = MetadataType.ALL,
- exc_cb=imds_readurl_exception_callback,
- infinite: bool = False,
- ) -> dict:
- """Fetch metadata from IMDS using IMDS_VER_WANT API version.
-
- Falls back to IMDS_VER_MIN version if IMDS returns a 400 error code,
- indicating that IMDS_VER_WANT is unsupported.
-
- :return: Parsed metadata dictionary or empty dict on error.
- """
- LOG.info("Attempting IMDS api-version: %s", IMDS_VER_WANT)
- try:
- return get_metadata_from_imds(
- retries=retries,
- md_type=md_type,
- api_version=IMDS_VER_WANT,
- exc_cb=exc_cb,
- infinite=infinite,
- )
- except UrlError as error:
- LOG.info("UrlError with IMDS api-version: %s", IMDS_VER_WANT)
- # Fall back if HTTP code is 400, otherwise return empty dict.
- if error.code != 400:
- return {}
-
- log_msg = "Fall back to IMDS api-version: {}".format(IMDS_VER_MIN)
- report_diagnostic_event(log_msg, logger_func=LOG.info)
- try:
- return get_metadata_from_imds(
- retries=retries,
- md_type=md_type,
- api_version=IMDS_VER_MIN,
- exc_cb=exc_cb,
- infinite=infinite,
- )
- except UrlError as error:
- report_diagnostic_event(
- "Failed to fetch IMDS metadata: %s" % error,
- logger_func=LOG.error,
- )
- return {}
-
def get_instance_id(self):
if not self.metadata or "instance-id" not in self.metadata:
return self._iid()
@@ -1052,82 +984,18 @@ class DataSourceAzure(sources.DataSource):
primary nic, then we also get the expected total nic count from IMDS.
IMDS will process the request and send a response only for primary NIC.
"""
- is_primary = False
- expected_nic_count = -1
- imds_md = None
- metadata_poll_count = 0
- metadata_logging_threshold = 1
- expected_errors_count = 0
-
# For now, only a VM's primary NIC can contact IMDS and WireServer. If
# DHCP fails for a NIC, we have no mechanism to determine if the NIC is
# primary or secondary. In this case, retry DHCP until successful.
self._setup_ephemeral_networking(iface=ifname, timeout_minutes=20)
- # Retry polling network metadata for a limited duration only when the
- # calls fail due to network unreachable error or timeout.
- # This is because the platform drops packets going towards IMDS
- # when it is not a primary nic. If the calls fail due to other issues
- # like 410, 503 etc, then it means we are primary but IMDS service
- # is unavailable at the moment. Retry indefinitely in those cases
- # since we cannot move on without the network metadata. In the future,
- # all this will not be necessary, as a new dhcp option would tell
- # whether the nic is primary or not.
- def network_metadata_exc_cb(msg, exc):
- nonlocal expected_errors_count, metadata_poll_count
- nonlocal metadata_logging_threshold
-
- metadata_poll_count = metadata_poll_count + 1
-
- # Log when needed but back off exponentially to avoid exploding
- # the log file.
- if metadata_poll_count >= metadata_logging_threshold:
- metadata_logging_threshold *= 2
- report_diagnostic_event(
- "Ran into exception when attempting to reach %s "
- "after %d polls." % (msg, metadata_poll_count),
- logger_func=LOG.error,
- )
-
- if isinstance(exc, UrlError):
- report_diagnostic_event(
- "poll IMDS with %s failed. Exception: %s and code: %s"
- % (msg, exc.cause, exc.code),
- logger_func=LOG.error,
- )
-
- # Retry up to a certain limit for both timeout and network
- # unreachable errors.
- if exc.cause and isinstance(
- exc.cause, (requests.Timeout, requests.ConnectionError)
- ):
- expected_errors_count = expected_errors_count + 1
- return expected_errors_count <= 10
- return True
-
# Primary nic detection will be optimized in the future. The fact that
# primary nic is being attached first helps here. Otherwise each nic
# could add several seconds of delay.
- try:
- imds_md = self.get_imds_data_with_api_fallback(
- retries=0,
- md_type=MetadataType.NETWORK,
- exc_cb=network_metadata_exc_cb,
- infinite=True,
- )
- except Exception as e:
- LOG.warning(
- "Failed to get network metadata using nic %s. Attempt to "
- "contact IMDS failed with error %s. Assuming this is not the "
- "primary nic.",
- ifname,
- e,
- )
-
+ imds_md = self.get_metadata_from_imds()
if imds_md:
# Only primary NIC will get a response from IMDS.
LOG.info("%s is the primary nic", ifname)
- is_primary = True
# Set the expected nic count based on the response received.
expected_nic_count = len(imds_md["interface"])
@@ -1135,11 +1003,16 @@ class DataSourceAzure(sources.DataSource):
"Expected nic count: %d" % expected_nic_count,
logger_func=LOG.info,
)
- else:
- # If we are not the primary nic, then clean the dhcp context.
- self._teardown_ephemeral_networking()
+ return True, expected_nic_count
- return is_primary, expected_nic_count
+ # If we are not the primary nic, then clean the dhcp context.
+ LOG.warning(
+ "Failed to fetch IMDS metadata using nic %s. "
+ "Assuming this is not the primary nic.",
+ ifname,
+ )
+ self._teardown_ephemeral_networking()
+ return False, -1
@azure_ds_telemetry_reporter
def _wait_for_hot_attached_primary_nic(self, nl_sock):
@@ -1229,54 +1102,11 @@ class DataSourceAzure(sources.DataSource):
def _poll_imds(self):
"""Poll IMDS for the new provisioning data until we get a valid
response. Then return the returned JSON object."""
- url = "{}?api-version={}".format(
- MetadataType.REPROVISION_DATA.value, IMDS_VER_MIN
- )
- headers = {"Metadata": "true"}
nl_sock = None
report_ready = bool(
not os.path.isfile(self._reported_ready_marker_file)
)
- self.imds_logging_threshold = 1
- self.imds_poll_counter = 1
dhcp_attempts = 0
- reprovision_data = None
-
- def exc_cb(msg, exception):
- if isinstance(exception, UrlError):
- if exception.code in (404, 410):
- if self.imds_poll_counter == self.imds_logging_threshold:
- # Reducing the logging frequency as we are polling IMDS
- self.imds_logging_threshold *= 2
- LOG.debug(
- "Backing off logging threshold for the same "
- "exception to %d",
- self.imds_logging_threshold,
- )
- report_diagnostic_event(
- "poll IMDS with %s failed. "
- "Exception: %s and code: %s"
- % (msg, exception.cause, exception.code),
- logger_func=LOG.debug,
- )
- self.imds_poll_counter += 1
- return True
- else:
- # If we get an exception while trying to call IMDS, we call
- # DHCP and setup the ephemeral network to acquire a new IP.
- report_diagnostic_event(
- "poll IMDS with %s failed. Exception: %s and code: %s"
- % (msg, exception.cause, exception.code),
- logger_func=LOG.warning,
- )
- return False
-
- report_diagnostic_event(
- "poll IMDS failed with an unexpected exception: %s"
- % exception,
- logger_func=LOG.warning,
- )
- return False
if report_ready:
# Networking must be up for netlink to detect
@@ -1338,6 +1168,7 @@ class DataSourceAzure(sources.DataSource):
# Teardown old network configuration.
self._teardown_ephemeral_networking()
+ reprovision_data = None
while not reprovision_data:
if not self._is_ephemeral_networking_up():
dhcp_attempts += 1
@@ -1352,14 +1183,7 @@ class DataSourceAzure(sources.DataSource):
parent=azure_ds_reporter,
):
try:
- reprovision_data = readurl(
- url,
- timeout=IMDS_TIMEOUT_IN_SECONDS,
- headers=headers,
- exception_cb=exc_cb,
- infinite=True,
- log_req_resp=False,
- ).contents
+ reprovision_data = imds.fetch_reprovision_data()
except UrlError:
self._teardown_ephemeral_networking()
continue
@@ -1368,10 +1192,6 @@ class DataSourceAzure(sources.DataSource):
"attempted dhcp %d times after reuse" % dhcp_attempts,
logger_func=LOG.debug,
)
- report_diagnostic_event(
- "polled imds %d times after reuse" % self.imds_poll_counter,
- logger_func=LOG.debug,
- )
return reprovision_data
@@ -2089,96 +1909,6 @@ def _generate_network_config_from_fallback_config() -> dict:
@azure_ds_telemetry_reporter
-def get_metadata_from_imds(
- retries,
- md_type=MetadataType.ALL,
- api_version=IMDS_VER_MIN,
- exc_cb=imds_readurl_exception_callback,
- infinite=False,
-):
- """Query Azure's instance metadata service, returning a dictionary.
-
- For more info on IMDS:
- https://docs.microsoft.com/en-us/azure/virtual-machines/windows/instance-metadata-service
-
- @param retries: The number of retries of the IMDS_URL.
- @param md_type: Metadata type for IMDS request.
- @param api_version: IMDS api-version to use in the request.
-
- @return: A dict of instance metadata containing compute and network
- info.
- """
- kwargs = {
- "logfunc": LOG.debug,
- "msg": "Crawl of Azure Instance Metadata Service (IMDS)",
- "func": _get_metadata_from_imds,
- "args": (retries, exc_cb, md_type, api_version, infinite),
- }
- try:
- return util.log_time(**kwargs)
- except Exception as e:
- report_diagnostic_event(
- "exception while getting metadata: %s" % e,
- logger_func=LOG.warning,
- )
- raise
-
-
-@azure_ds_telemetry_reporter
-def _get_metadata_from_imds(
- retries,
- exc_cb,
- md_type=MetadataType.ALL,
- api_version=IMDS_VER_MIN,
- infinite=False,
-):
- url = "{}?api-version={}".format(md_type.value, api_version)
- headers = {"Metadata": "true"}
-
- # support for extended metadata begins with 2021-03-01
- if api_version >= IMDS_EXTENDED_VER_MIN and md_type == MetadataType.ALL:
- url = url + "&extended=true"
-
- try:
- response = readurl(
- url,
- timeout=IMDS_TIMEOUT_IN_SECONDS,
- headers=headers,
- retries=retries,
- exception_cb=exc_cb,
- infinite=infinite,
- )
- except Exception as e:
- # pylint:disable=no-member
- if isinstance(e, UrlError) and e.code == 400:
- raise
- else:
- report_diagnostic_event(
- "Ignoring IMDS instance metadata. "
- "Get metadata from IMDS failed: %s" % e,
- logger_func=LOG.warning,
- )
- return {}
- try:
- from json.decoder import JSONDecodeError
-
- json_decode_error = JSONDecodeError
- except ImportError:
- json_decode_error = ValueError
-
- try:
- return util.load_json(response.contents)
- except json_decode_error as e:
- report_diagnostic_event(
- "Ignoring non-json IMDS instance metadata response: %s. "
- "Loading non-json IMDS response failed: %s"
- % (response.contents, e),
- logger_func=LOG.warning,
- )
- return {}
-
-
-@azure_ds_telemetry_reporter
def maybe_remove_ubuntu_network_config_scripts(paths=None):
"""Remove Azure-specific ubuntu network config for non-primary nics.