diff options
Diffstat (limited to 'cloudinit/sources/DataSourceAzure.py')
-rwxr-xr-x | cloudinit/sources/DataSourceAzure.py | 195 |
1 files changed, 147 insertions, 48 deletions
diff --git a/cloudinit/sources/DataSourceAzure.py b/cloudinit/sources/DataSourceAzure.py index 04ff2131..6cae9e82 100755 --- a/cloudinit/sources/DataSourceAzure.py +++ b/cloudinit/sources/DataSourceAzure.py @@ -78,17 +78,15 @@ AGENT_SEED_DIR = '/var/lib/waagent' # In the event where the IMDS primary server is not # available, it takes 1s to fallback to the secondary one IMDS_TIMEOUT_IN_SECONDS = 2 -IMDS_URL = "http://169.254.169.254/metadata/" -IMDS_VER = "2019-06-01" -IMDS_VER_PARAM = "api-version={}".format(IMDS_VER) +IMDS_URL = "http://169.254.169.254/metadata" +IMDS_VER_MIN = "2019-06-01" +IMDS_VER_WANT = "2020-09-01" class metadata_type(Enum): - compute = "{}instance?{}".format(IMDS_URL, IMDS_VER_PARAM) - network = "{}instance/network?{}".format(IMDS_URL, - IMDS_VER_PARAM) - reprovisiondata = "{}reprovisiondata?{}".format(IMDS_URL, - IMDS_VER_PARAM) + compute = "{}/instance".format(IMDS_URL) + network = "{}/instance/network".format(IMDS_URL) + reprovisiondata = "{}/reprovisiondata".format(IMDS_URL) PLATFORM_ENTROPY_SOURCE = "/sys/firmware/acpi/tables/OEM0" @@ -270,7 +268,7 @@ BUILTIN_DS_CONFIG = { } # RELEASE_BLOCKER: Xenial and earlier apply_network_config default is False -BUILTIN_CLOUD_CONFIG = { +BUILTIN_CLOUD_EPHEMERAL_DISK_CONFIG = { 'disk_setup': { 'ephemeral0': {'table_type': 'gpt', 'layout': [100], @@ -349,6 +347,8 @@ class DataSourceAzure(sources.DataSource): self.update_events['network'].add(EventType.BOOT) self._ephemeral_dhcp_ctx = None + self.failed_desired_api_version = False + def __str__(self): root = sources.DataSource.__str__(self) return "%s [seed=%s]" % (root, self.seed) @@ -520,8 +520,10 @@ class DataSourceAzure(sources.DataSource): self._wait_for_all_nics_ready() ret = self._reprovision() - imds_md = get_metadata_from_imds( - self.fallback_interface, retries=10) + imds_md = self.get_imds_data_with_api_fallback( + self.fallback_interface, + retries=10 + ) (md, userdata_raw, cfg, files) = ret self.seed = cdev crawled_data.update({ @@ -618,8 +620,26 @@ class DataSourceAzure(sources.DataSource): maybe_remove_ubuntu_network_config_scripts() # Process crawled data and augment with various config defaults - self.cfg = util.mergemanydict( - [crawled_data['cfg'], BUILTIN_CLOUD_CONFIG]) + + # Only merge in default cloud config related to the ephemeral disk + # if the ephemeral disk exists + devpath = RESOURCE_DISK_PATH + if os.path.exists(devpath): + report_diagnostic_event( + "Ephemeral resource disk '%s' exists. " + "Merging default Azure cloud ephemeral disk configs." + % devpath, + logger_func=LOG.debug) + self.cfg = util.mergemanydict( + [crawled_data['cfg'], BUILTIN_CLOUD_EPHEMERAL_DISK_CONFIG]) + else: + report_diagnostic_event( + "Ephemeral resource disk '%s' does not exist. " + "Not merging default Azure cloud ephemeral disk configs." + % devpath, + logger_func=LOG.debug) + self.cfg = crawled_data['cfg'] + self._metadata_imds = crawled_data['metadata']['imds'] self.metadata = util.mergemanydict( [crawled_data['metadata'], DEFAULT_METADATA]) @@ -634,6 +654,57 @@ class DataSourceAzure(sources.DataSource): self.ds_cfg['data_dir'], crawled_data['files'], dirmode=0o700) return True + @azure_ds_telemetry_reporter + def get_imds_data_with_api_fallback( + self, + fallback_nic, + retries, + md_type=metadata_type.compute): + """ + Wrapper for get_metadata_from_imds so that we can have flexibility + in which IMDS api-version we use. If a particular instance of IMDS + does not have the api version that is desired, we want to make + this fault tolerant and fall back to a good known minimum api + version. + """ + + if not self.failed_desired_api_version: + for _ in range(retries): + try: + LOG.info( + "Attempting IMDS api-version: %s", + IMDS_VER_WANT + ) + return get_metadata_from_imds( + fallback_nic=fallback_nic, + retries=0, + md_type=md_type, + api_version=IMDS_VER_WANT + ) + except UrlError as err: + LOG.info( + "UrlError with IMDS api-version: %s", + IMDS_VER_WANT + ) + if err.code == 400: + log_msg = "Fall back to IMDS api-version: {}".format( + IMDS_VER_MIN + ) + report_diagnostic_event( + log_msg, + logger_func=LOG.info + ) + self.failed_desired_api_version = True + break + + LOG.info("Using IMDS api-version: %s", IMDS_VER_MIN) + return get_metadata_from_imds( + fallback_nic=fallback_nic, + retries=retries, + md_type=md_type, + api_version=IMDS_VER_MIN + ) + def device_name_to_device(self, name): return self.ds_cfg['disk_aliases'].get(name) @@ -651,6 +722,10 @@ class DataSourceAzure(sources.DataSource): LOG.debug('Retrieving public SSH keys') ssh_keys = [] try: + raise KeyError( + "Not using public SSH keys from IMDS" + ) + # pylint:disable=unreachable ssh_keys = [ public_key['keyData'] for public_key @@ -679,10 +754,18 @@ class DataSourceAzure(sources.DataSource): def _iid(self, previous=None): prev_iid_path = os.path.join( self.paths.get_cpath('data'), 'instance-id') - iid = dmi.read_dmi_data('system-uuid') + # Older kernels than 4.15 will have UPPERCASE product_uuid. + # We don't want Azure to react to an UPPER/lower difference as a new + # instance id as it rewrites SSH host keys. + # LP: #1835584 + iid = dmi.read_dmi_data('system-uuid').lower() if os.path.exists(prev_iid_path): previous = util.load_file(prev_iid_path).strip() - if is_byte_swapped(previous, iid): + if previous.lower() == iid: + # If uppercase/lowercase equivalent, return the previous value + # to avoid new instance id. + return previous + if is_byte_swapped(previous.lower(), iid): return previous return iid @@ -850,10 +933,11 @@ class DataSourceAzure(sources.DataSource): # primary nic is being attached first helps here. Otherwise each nic # could add several seconds of delay. try: - imds_md = get_metadata_from_imds( + imds_md = self.get_imds_data_with_api_fallback( ifname, 5, - metadata_type.network) + metadata_type.network + ) except Exception as e: LOG.warning( "Failed to get network metadata using nic %s. Attempt to " @@ -983,10 +1067,14 @@ class DataSourceAzure(sources.DataSource): if nl_sock: nl_sock.close() + @azure_ds_telemetry_reporter def _poll_imds(self): """Poll IMDS for the new provisioning data until we get a valid response. Then return the returned JSON object.""" - url = metadata_type.reprovisiondata.value + url = "{}?api-version={}".format( + metadata_type.reprovisiondata.value, + IMDS_VER_MIN + ) headers = {"Metadata": "true"} nl_sock = None report_ready = bool(not os.path.isfile(REPORTED_READY_MARKER_FILE)) @@ -1271,6 +1359,10 @@ class DataSourceAzure(sources.DataSource): pubkey_info = None try: + raise KeyError( + "Not using public SSH keys from IMDS" + ) + # pylint:disable=unreachable public_keys = self.metadata['imds']['compute']['publicKeys'] LOG.debug( 'Successfully retrieved %s key(s) from IMDS', @@ -1451,26 +1543,17 @@ def can_dev_be_reformatted(devpath, preserve_ntfs): @azure_ds_telemetry_reporter -def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, maxwait=120, +def address_ephemeral_resize(devpath=RESOURCE_DISK_PATH, is_new_instance=False, preserve_ntfs=False): - # wait for ephemeral disk to come up - naplen = .2 - with events.ReportEventStack( - name="wait-for-ephemeral-disk", - description="wait for ephemeral disk", - parent=azure_ds_reporter - ): - missing = util.wait_for_files([devpath], - maxwait=maxwait, - naplen=naplen, - log_pre="Azure ephemeral disk: ") - - if missing: - report_diagnostic_event( - "ephemeral device '%s' did not appear after %d seconds." % - (devpath, maxwait), - logger_func=LOG.warning) - return + if not os.path.exists(devpath): + report_diagnostic_event( + "Ephemeral resource disk '%s' does not exist." % devpath, + logger_func=LOG.debug) + return + else: + report_diagnostic_event( + "Ephemeral resource disk '%s' exists." % devpath, + logger_func=LOG.debug) result = False msg = None @@ -1969,6 +2052,7 @@ def _generate_network_config_from_imds_metadata(imds_metadata) -> dict: netconfig = {'version': 2, 'ethernets': {}} network_metadata = imds_metadata['network'] for idx, intf in enumerate(network_metadata['interface']): + has_ip_address = False # First IPv4 and/or IPv6 address will be obtained via DHCP. # Any additional IPs of each type will be set as static # addresses. @@ -1978,6 +2062,11 @@ def _generate_network_config_from_imds_metadata(imds_metadata) -> dict: 'dhcp6': False} for addr_type in ('ipv4', 'ipv6'): addresses = intf.get(addr_type, {}).get('ipAddress', []) + # If there are no available IP addresses, then we don't + # want to add this interface to the generated config. + if not addresses: + continue + has_ip_address = True if addr_type == 'ipv4': default_prefix = '24' else: @@ -1998,7 +2087,7 @@ def _generate_network_config_from_imds_metadata(imds_metadata) -> dict: dev_config['addresses'].append( '{ip}/{prefix}'.format( ip=privateIp, prefix=netPrefix)) - if dev_config: + if dev_config and has_ip_address: mac = ':'.join(re.findall(r'..', intf['macAddress'])) dev_config.update({ 'match': {'macaddress': mac.lower()}, @@ -2027,7 +2116,8 @@ def _generate_network_config_from_fallback_config() -> dict: @azure_ds_telemetry_reporter def get_metadata_from_imds(fallback_nic, retries, - md_type=metadata_type.compute): + md_type=metadata_type.compute, + api_version=IMDS_VER_MIN): """Query Azure's instance metadata service, returning a dictionary. If network is not up, setup ephemeral dhcp on fallback_nic to talk to the @@ -2037,13 +2127,16 @@ def get_metadata_from_imds(fallback_nic, @param fallback_nic: String. The name of the nic which requires active network in order to query IMDS. @param retries: The number of retries of the IMDS_URL. + @param md_type: Metadata type for IMDS request. + @param api_version: IMDS api-version to use in the request. @return: A dict of instance metadata containing compute and network info. """ kwargs = {'logfunc': LOG.debug, 'msg': 'Crawl of Azure Instance Metadata Service (IMDS)', - 'func': _get_metadata_from_imds, 'args': (retries, md_type,)} + 'func': _get_metadata_from_imds, + 'args': (retries, md_type, api_version,)} if net.is_up(fallback_nic): return util.log_time(**kwargs) else: @@ -2059,20 +2152,26 @@ def get_metadata_from_imds(fallback_nic, @azure_ds_telemetry_reporter -def _get_metadata_from_imds(retries, md_type=metadata_type.compute): - - url = md_type.value +def _get_metadata_from_imds( + retries, + md_type=metadata_type.compute, + api_version=IMDS_VER_MIN): + url = "{}?api-version={}".format(md_type.value, api_version) headers = {"Metadata": "true"} try: response = readurl( url, timeout=IMDS_TIMEOUT_IN_SECONDS, headers=headers, retries=retries, exception_cb=retry_on_url_exc) except Exception as e: - report_diagnostic_event( - 'Ignoring IMDS instance metadata. ' - 'Get metadata from IMDS failed: %s' % e, - logger_func=LOG.warning) - return {} + # pylint:disable=no-member + if isinstance(e, UrlError) and e.code == 400: + raise + else: + report_diagnostic_event( + 'Ignoring IMDS instance metadata. ' + 'Get metadata from IMDS failed: %s' % e, + logger_func=LOG.warning) + return {} try: from json.decoder import JSONDecodeError json_decode_error = JSONDecodeError |