diff options
Diffstat (limited to 'cloudinit/sources/azure/imds.py')
-rw-r--r-- | cloudinit/sources/azure/imds.py | 131 |
1 files changed, 75 insertions, 56 deletions
diff --git a/cloudinit/sources/azure/imds.py b/cloudinit/sources/azure/imds.py index 54fc9a05..5e4046d0 100644 --- a/cloudinit/sources/azure/imds.py +++ b/cloudinit/sources/azure/imds.py @@ -2,7 +2,6 @@ # # This file is part of cloud-init. See LICENSE file for license information. -import functools from typing import Dict import requests @@ -10,25 +9,69 @@ import requests from cloudinit import log as logging from cloudinit import util from cloudinit.sources.helpers.azure import report_diagnostic_event -from cloudinit.url_helper import UrlError, readurl, retry_on_url_exc +from cloudinit.url_helper import UrlError, readurl LOG = logging.getLogger(__name__) IMDS_URL = "http://169.254.169.254/metadata" -_readurl_exception_callback = functools.partial( - retry_on_url_exc, - retry_codes=( - 404, # not found (yet) - 410, # gone / unavailable (yet) - 429, # rate-limited/throttled - 500, # server error - ), - retry_instances=( - requests.ConnectionError, - requests.Timeout, - ), -) + +class ReadUrlRetryHandler: + def __init__( + self, + *, + retry_codes=( + 404, # not found (yet) + 410, # gone / unavailable (yet) + 429, # rate-limited/throttled + 500, # server error + ), + max_connection_errors: int = 10, + logging_backoff: float = 1.0, + ) -> None: + self.logging_backoff = logging_backoff + self.max_connection_errors = max_connection_errors + self.retry_codes = retry_codes + self._logging_threshold = 1.0 + self._request_count = 0 + + def exception_callback(self, req_args, exception) -> bool: + self._request_count += 1 + if not isinstance(exception, UrlError): + report_diagnostic_event( + "Polling IMDS failed with unexpected exception: %r" + % (exception), + logger_func=LOG.warning, + ) + return False + + log = True + retry = True + + # Check for connection errors which may occur early boot, but + # are otherwise indicative that we are not connecting with the + # primary NIC. + if isinstance( + exception.cause, (requests.ConnectionError, requests.Timeout) + ): + self.max_connection_errors -= 1 + if self.max_connection_errors < 0: + retry = False + elif exception.code not in self.retry_codes: + retry = False + + if self._request_count >= self._logging_threshold: + self._logging_threshold *= self.logging_backoff + else: + log = False + + if log or not retry: + report_diagnostic_event( + "Polling IMDS failed attempt %d with exception: %r" + % (self._request_count, exception), + logger_func=LOG.info, + ) + return retry def _fetch_url( @@ -38,11 +81,12 @@ def _fetch_url( :raises UrlError: on error fetching metadata. """ + handler = ReadUrlRetryHandler() try: response = readurl( url, - exception_cb=_readurl_exception_callback, + exception_cb=handler.exception_callback, headers={"Metadata": "true"}, infinite=False, log_req_resp=log_response, @@ -61,13 +105,14 @@ def _fetch_url( def _fetch_metadata( url: str, + retries: int = 10, ) -> Dict: """Fetch IMDS metadata. :raises UrlError: on error fetching metadata. :raises ValueError: on error parsing metadata. """ - metadata = _fetch_url(url) + metadata = _fetch_url(url, retries=retries) try: return util.load_json(metadata) @@ -79,7 +124,7 @@ def _fetch_metadata( raise -def fetch_metadata_with_api_fallback() -> Dict: +def fetch_metadata_with_api_fallback(retries: int = 10) -> Dict: """Fetch extended metadata, falling back to non-extended as required. :raises UrlError: on error fetching metadata. @@ -87,7 +132,7 @@ def fetch_metadata_with_api_fallback() -> Dict: """ try: url = IMDS_URL + "/instance?api-version=2021-08-01&extended=true" - return _fetch_metadata(url) + return _fetch_metadata(url, retries=retries) except UrlError as error: if error.code == 400: report_diagnostic_event( @@ -95,7 +140,7 @@ def fetch_metadata_with_api_fallback() -> Dict: logger_func=LOG.warning, ) url = IMDS_URL + "/instance?api-version=2019-06-01" - return _fetch_metadata(url) + return _fetch_metadata(url, retries=retries) raise @@ -106,43 +151,17 @@ def fetch_reprovision_data() -> bytes: """ url = IMDS_URL + "/reprovisiondata?api-version=2019-06-01" - logging_threshold = 1 - poll_counter = 0 - - def exception_callback(msg, exception): - nonlocal logging_threshold - nonlocal poll_counter - - poll_counter += 1 - if not isinstance(exception, UrlError): - report_diagnostic_event( - "Polling IMDS failed with unexpected exception: %r" - % (exception), - logger_func=LOG.warning, - ) - return False - - log = True - retry = False - if exception.code in (404, 410): - retry = True - if poll_counter >= logging_threshold: - # Exponential back-off on logging. - logging_threshold *= 2 - else: - log = False - - if log: - report_diagnostic_event( - "Polling IMDS failed with exception: %r count: %d" - % (exception, poll_counter), - logger_func=LOG.info, - ) - return retry - + handler = ReadUrlRetryHandler( + logging_backoff=2.0, + max_connection_errors=0, + retry_codes=( + 404, + 410, + ), + ) response = readurl( url, - exception_cb=exception_callback, + exception_cb=handler.exception_callback, headers={"Metadata": "true"}, infinite=True, log_req_resp=False, @@ -150,7 +169,7 @@ def fetch_reprovision_data() -> bytes: ) report_diagnostic_event( - f"Polled IMDS {poll_counter+1} time(s)", + f"Polled IMDS {handler._request_count+1} time(s)", logger_func=LOG.debug, ) return response.contents |