summaryrefslogtreecommitdiff
path: root/ironic/drivers/modules/irmc/raid.py
blob: 26737ea11e3d083ea51c7f164d905d3a46ef06a5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
# Copyright 2018 FUJITSU LIMITED
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may
# not use this file except in compliance with the License. You may obtain
# a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.

"""
Irmc RAID specific methods
"""
from ironic_lib import metrics_utils
from oslo_log import log as logging
from oslo_utils import importutils

from ironic.common import exception
from ironic.common import raid as raid_common
from ironic.common import states
from ironic.conductor import periodics
from ironic.conductor import utils as manager_utils
from ironic import conf
from ironic.drivers import base
from ironic.drivers.modules import deploy_utils
from ironic.drivers.modules.irmc import common as irmc_common

client = importutils.try_import('scciclient.irmc')

LOG = logging.getLogger(__name__)
CONF = conf.CONF

METRICS = metrics_utils.get_metrics_logger(__name__)

RAID_LEVELS = {
    '0': {
        'min_disks': 1,
        'max_disks': 1000,
        'factor': 0,
    },
    '1': {
        'min_disks': 2,
        'max_disks': 2,
        'factor': 1,
    },
    '5': {
        'min_disks': 3,
        'max_disks': 1000,
        'factor': 1,
    },
    '6': {
        'min_disks': 4,
        'max_disks': 1000,
        'factor': 2,
    },
    '10': {
        'min_disks': 4,
        'max_disks': 1000,
        'factor': 2,
    },
    '50': {
        'min_disks': 6,
        'max_disks': 1000,
        'factor': 2,
    }
}

RAID_COMPLETING = 'completing'
RAID_COMPLETED = 'completed'
RAID_FAILED = 'failed'


def _get_raid_adapter(node):
    """Get the RAID adapter info on a RAID controller.

    :param node: an ironic node object.
    :returns: RAID adapter dictionary, None otherwise.
    :raises: IRMCOperationError on an error from python-scciclient.
    """
    irmc_info = node.driver_info
    LOG.info('iRMC driver is gathering RAID adapter info for node %s',
             node.uuid)
    try:
        return client.elcm.get_raid_adapter(irmc_info)
    except client.elcm.ELCMProfileNotFound:
        reason = ('Cannot find any RAID profile in "%s"' % node.uuid)
        raise exception.IRMCOperationError(operation='RAID config',
                                           error=reason)


def _get_fgi_status(report, node_uuid):
    """Get a dict FGI(Foreground initialization) status on a RAID controller.

    :param report: SCCI report information.
    :returns: FGI status on success, None if SCCIInvalidInputError and
              waiting status if SCCIRAIDNotReady.
    """
    try:
        return client.scci.get_raid_fgi_status(report)
    except client.scci.SCCIInvalidInputError:
        LOG.warning('ServerViewRAID not available in %(node)s',
                    {'node': node_uuid})
    except client.scci.SCCIRAIDNotReady:
        return RAID_COMPLETING


def _get_physical_disk(node):
    """Get physical disks info on a RAID controller.

    This method only support to create the RAID configuration
    on the RAIDAdapter 0.

    :param node: an ironic node object.
    :returns: dict of physical disks on RAID controller.
    """

    physical_disk_dict = {}
    raid_adapter = _get_raid_adapter(node)
    physical_disks = raid_adapter['Server']['HWConfigurationIrmc'][
        'Adapters']['RAIDAdapter'][0]['PhysicalDisks']

    if physical_disks:
        for disks in physical_disks['PhysicalDisk']:
            physical_disk_dict.update({disks['Slot']: disks['Type']})

    return physical_disk_dict


def _create_raid_adapter(node):
    """Create RAID adapter info on a RAID controller.

    :param node: an ironic node object.
    :raises: IRMCOperationError on an error from python-scciclient.
    """

    irmc_info = node.driver_info
    target_raid_config = node.target_raid_config

    try:
        return client.elcm.create_raid_configuration(irmc_info,
                                                     target_raid_config)
    except client.elcm.ELCMProfileNotFound as exc:
        LOG.error('iRMC driver failed with profile not found for node '
                  '%(node_uuid)s. Reason: %(error)s.',
                  {'node_uuid': node.uuid, 'error': exc})
        raise exception.IRMCOperationError(operation='RAID config',
                                           error=exc)
    except client.scci.SCCIClientError as exc:
        LOG.error('iRMC driver failed to create raid adapter info for node '
                  '%(node_uuid)s. Reason: %(error)s.',
                  {'node_uuid': node.uuid, 'error': exc})
        raise exception.IRMCOperationError(operation='RAID config',
                                           error=exc)


def _delete_raid_adapter(node):
    """Delete the RAID adapter info on a RAID controller.

    :param node: an ironic node object.
    :raises: IRMCOperationError if SCCI failed from python-scciclient.
    """

    irmc_info = node.driver_info

    try:
        client.elcm.delete_raid_configuration(irmc_info)
    except client.scci.SCCIClientError as exc:
        LOG.error('iRMC driver failed to delete RAID configuration '
                  'for node %(node_uuid)s. Reason: %(error)s.',
                  {'node_uuid': node.uuid, 'error': exc})
        raise exception.IRMCOperationError(operation='RAID config',
                                           error=exc)


def _commit_raid_config(task):
    """Perform to commit RAID config into node."""

    node = task.node
    node_uuid = task.node.uuid
    raid_config = {'logical_disks': []}

    raid_adapter = _get_raid_adapter(node)

    raid_adapter_info = raid_adapter['Server']['HWConfigurationIrmc'][
        'Adapters']['RAIDAdapter'][0]
    controller = raid_adapter_info['@AdapterId']
    raid_config['logical_disks'].append({'controller': controller})

    logical_drives = raid_adapter_info['LogicalDrives']['LogicalDrive']
    for logical_drive in logical_drives:
        raid_config['logical_disks'].append({'irmc_raid_info': {
            'logical_drive_number': logical_drive['@Number'], 'raid_level':
                logical_drive['RaidLevel'], 'name': logical_drive['Name'],
            ' size': logical_drive['Size']}})
    for physical_drive in \
            raid_adapter_info['PhysicalDisks']['PhysicalDisk']:
        raid_config['logical_disks'].append({'physical_drives': {
            'physical_drive': physical_drive}})
    node.raid_config = raid_config

    raid_common.update_raid_info(node, node.raid_config)
    LOG.info('RAID config is created successfully on node %s',
             node_uuid)

    deploy_utils.set_async_step_flags(
        task.node,
        reboot=True,
        skip_current_step=True,
        polling=True)

    return states.CLEANWAIT


def _validate_logical_drive_capacity(disk, valid_disk_slots):
    physical_disks = valid_disk_slots['PhysicalDisk']
    size_gb = {}
    all_volume_list = []
    physical_disk_list = []

    for size in physical_disks:
        size_gb.update({size['@Number']: size['Size']['#text']})
        all_volume_list.append(size['Size']['#text'])

    factor = RAID_LEVELS[disk['raid_level']]['factor']

    if disk.get('physical_disks'):
        selected_disks = \
            [physical_disk for physical_disk in disk['physical_disks']]
        for volume in selected_disks:
            physical_disk_list.append(size_gb[volume])
        if disk['raid_level'] == '10':
            valid_capacity = \
                min(physical_disk_list) * (len(physical_disk_list) / 2)
        else:
            valid_capacity = \
                min(physical_disk_list) * (len(physical_disk_list) - factor)
    else:
        valid_capacity = \
            min(all_volume_list) * \
            ((RAID_LEVELS[disk['raid_level']]['min_disks']) - factor)

    if disk['size_gb'] > valid_capacity:
        raise exception.InvalidParameterValue(
            'Insufficient disk capacity with %s GB' % disk['size_gb'])

    if disk['size_gb'] == valid_capacity:
        disk['size_gb'] = 'MAX'


def _validate_physical_disks(node, logical_disks):
    """Validate physical disks on a RAID configuration.

    :param node: an ironic node object.
    :param logical_disks: RAID info to set RAID configuration
    :raises: IRMCOperationError on an error.
    """
    raid_adapter = _get_raid_adapter(node)
    physical_disk_dict = _get_physical_disk(node)
    if raid_adapter is None:
        reason = ('Cannot find any raid profile in "%s"' % node.uuid)
        raise exception.IRMCOperationError(operation='RAID config',
                                           error=reason)
    if physical_disk_dict is None:
        reason = ('Cannot find any physical disks in "%s"' % node.uuid)
        raise exception.IRMCOperationError(operation='RAID config',
                                           error=reason)
    valid_disks = raid_adapter['Server']['HWConfigurationIrmc'][
        'Adapters']['RAIDAdapter'][0]['PhysicalDisks']
    if valid_disks is None:
        reason = ('Cannot find any HDD over in the node "%s"' % node.uuid)
        raise exception.IRMCOperationError(operation='RAID config',
                                           error=reason)
    valid_disk_slots = [slot['Slot'] for slot in valid_disks['PhysicalDisk']]
    remain_valid_disk_slots = list(valid_disk_slots)
    number_of_valid_disks = len(valid_disk_slots)
    used_valid_disk_slots = []

    for disk in logical_disks:
        # Check raid_level value in the target_raid_config of node
        if disk.get('raid_level') not in RAID_LEVELS:
            reason = ('RAID level is not supported: "%s"'
                      % disk.get('raid_level'))
            raise exception.IRMCOperationError(operation='RAID config',
                                               error=reason)

        min_disk_value = RAID_LEVELS[disk['raid_level']]['min_disks']
        max_disk_value = RAID_LEVELS[disk['raid_level']]['max_disks']
        remain_valid_disks = number_of_valid_disks - min_disk_value
        number_of_valid_disks = number_of_valid_disks - min_disk_value

        if remain_valid_disks < 0:
            reason = ('Physical disks do not enough slots for raid "%s"'
                      % disk['raid_level'])
            raise exception.IRMCOperationError(operation='RAID config',
                                               error=reason)

        if 'physical_disks' in disk:
            type_of_disks = []
            number_of_physical_disks = len(disk['physical_disks'])
            # Check number of physical disks along with raid level
            if number_of_physical_disks > max_disk_value:
                reason = ("Too many disks requested for RAID level %(level)s, "
                          "maximum is %(max)s",
                          {'level': disk['raid_level'], 'max': max_disk_value})
                raise exception.InvalidParameterValue(err=reason)
            if number_of_physical_disks < min_disk_value:
                reason = ("Not enough disks requested for RAID level "
                          "%(level)s, minimum is %(min)s ",
                          {'level': disk['raid_level'], 'min': min_disk_value})
                raise exception.IRMCOperationError(operation='RAID config',
                                                   error=reason)
            # Check physical disks in valid disk slots
            for phys_disk in disk['physical_disks']:
                if int(phys_disk) not in valid_disk_slots:
                    reason = ("Incorrect physical disk %(disk)s, correct are "
                              "%(valid)s",
                              {'disk': phys_disk, 'valid': valid_disk_slots})
                    raise exception.IRMCOperationError(operation='RAID config',
                                                       error=reason)
                type_of_disks.append(physical_disk_dict[int(phys_disk)])
                if physical_disk_dict[int(phys_disk)] != type_of_disks[0]:
                    reason = ('Cannot create RAID configuration with '
                              'different hard drives type %s'
                              % physical_disk_dict[int(phys_disk)])
                    raise exception.IRMCOperationError(operation='RAID config',
                                                       error=reason)
                # Check physical disk values with used disk slots
                if int(phys_disk) in used_valid_disk_slots:
                    reason = ("Disk %s is already used in a RAID configuration"
                              % disk['raid_level'])
                    raise exception.IRMCOperationError(operation='RAID config',
                                                       error=reason)

                used_valid_disk_slots.append(int(phys_disk))
                remain_valid_disk_slots.remove(int(phys_disk))

        if disk['size_gb'] != 'MAX':
            # Validate size_gb value input
            _validate_logical_drive_capacity(disk, valid_disks)


class IRMCRAID(base.RAIDInterface):

    def get_properties(self):
        """Return the properties of the interface."""
        return irmc_common.COMMON_PROPERTIES

    @METRICS.timer('IRMCRAID.create_configuration')
    @base.clean_step(priority=0, argsinfo={
        'create_root_volume': {
            'description': ('This specifies whether to create the root volume.'
                            'Defaults to `True`.'
                            ),
            'required': False
        },
        'create_nonroot_volumes': {
            'description': ('This specifies whether to create the non-root '
                            'volumes. '
                            'Defaults to `True`.'
                            ),
            'required': False
        }
    })
    def create_configuration(self, task,
                             create_root_volume=True,
                             create_nonroot_volumes=True):
        """Create the RAID configuration.

        This method creates the RAID configuration on the given node.

        :param task: a TaskManager instance containing the node to act on.
        :param create_root_volume: If True, a root volume is created
            during RAID configuration. Otherwise, no root volume is
            created. Default is True.
        :param create_nonroot_volumes: If True, non-root volumes are
            created. If False, no non-root volumes are created. Default
            is True.
        :returns: states.CLEANWAIT if RAID configuration is in progress
            asynchronously.
        :raises: MissingParameterValue, if node.target_raid_config is missing
            or empty.
        :raises: IRMCOperationError on an error from scciclient
        """

        node = task.node

        if not node.target_raid_config:
            raise exception.MissingParameterValue(
                'Missing the target_raid_config in node %s' % node.uuid)

        target_raid_config = node.target_raid_config.copy()

        logical_disks = target_raid_config['logical_disks']
        for log_disk in logical_disks:
            if log_disk.get('raid_level'):
                log_disk['raid_level'] = str(
                    log_disk['raid_level']).replace('+', '')

        # Validate physical disks on Fujitsu BM Server
        _validate_physical_disks(node, logical_disks)

        # Executing raid configuration on Fujitsu BM Server
        _create_raid_adapter(node)

        return _commit_raid_config(task)

    @METRICS.timer('IRMCRAID.delete_configuration')
    @base.clean_step(priority=0)
    def delete_configuration(self, task):
        """Delete the RAID configuration.

        :param task: a TaskManager instance containing the node to act on.
        :returns: states.CLEANWAIT if deletion is in progress
            asynchronously or None if it is complete.
        """
        node = task.node
        node_uuid = task.node.uuid

        # Default delete everything raid configuration in BM Server
        _delete_raid_adapter(node)
        node.raid_config = {}
        node.save()
        LOG.info('RAID config is deleted successfully on node %(node_id)s.'
                 'RAID config will clear and return (cfg)s value',
                 {'node_id': node_uuid, 'cfg': node.raid_config})

    @METRICS.timer('IRMCRAID._query_raid_config_fgi_status')
    @periodics.node_periodic(
        purpose='checking async RAID configuration tasks',
        spacing=CONF.irmc.query_raid_config_fgi_status_interval,
        filters={'reserved': False, 'provision_state': states.CLEANWAIT,
                 'maintenance': False},
        predicate_extra_fields=['raid_config'],
        predicate=lambda n: (
            n.raid_config and not n.raid_config.get('fgi_status')
        ),
    )
    def _query_raid_config_fgi_status(self, task, manager, context):
        """Periodic tasks to check the progress of running RAID config."""
        node = task.node
        node_uuid = task.node.uuid
        if task.node.target_raid_config is None:
            return
        task.upgrade_lock()
        if node.provision_state != states.CLEANWAIT:
            return
        # Avoid hitting clean_callback_timeout expiration
        node.touch_provisioning()

        raid_config = node.raid_config

        try:
            report = irmc_common.get_irmc_report(node)
        except client.scci.SCCIInvalidInputError:
            raid_config.update({'fgi_status': RAID_FAILED})
            raid_common.update_raid_info(node, raid_config)
            self._set_clean_failed(task, RAID_FAILED)
            return
        except client.scci.SCCIClientError:
            raid_config.update({'fgi_status': RAID_FAILED})
            raid_common.update_raid_info(node, raid_config)
            self._set_clean_failed(task, RAID_FAILED)
            return

        fgi_status_dict = _get_fgi_status(report, node_uuid)
        # Note(trungnv): Allow to check until RAID mechanism to be
        # completed with RAID information in report.
        if fgi_status_dict == 'completing':
            return
        if not fgi_status_dict:
            raid_config.update({'fgi_status': RAID_FAILED})
            raid_common.update_raid_info(node, raid_config)
            self._set_clean_failed(task, fgi_status_dict)
            return
        if all(fgi_status == 'Idle' for fgi_status in
               fgi_status_dict.values()):
            raid_config.update({'fgi_status': RAID_COMPLETED})
            raid_common.update_raid_info(node, raid_config)
            LOG.info('RAID configuration has completed on '
                     'node %(node)s with fgi_status is %(fgi)s',
                     {'node': node_uuid, 'fgi': RAID_COMPLETED})
            self._resume_cleaning(task)

    def _set_clean_failed(self, task, fgi_status_dict):
        LOG.error('RAID configuration task failed for node %(node)s. '
                  'with FGI status is: %(fgi)s. ',
                  {'node': task.node.uuid, 'fgi': fgi_status_dict})
        fgi_message = 'ServerViewRAID not available in Baremetal Server'
        task.node.last_error = fgi_message
        task.process_event('fail')

    def _resume_cleaning(self, task):
        manager_utils.notify_conductor_resume_clean(task)