From 732203e97448fc4cf1ec2d644fa062e3ea75d556 Mon Sep 17 00:00:00 2001 From: mhecko Date: Tue, 2 Apr 2024 19:29:16 +0200 Subject: [PATCH 1/2] boot: check first partition offset on GRUB devices Check that the first partition starts at least at 1MiB (2048 cylinders), as too small first-partition offsets lead to failures when doing grub2-install. The limit (1MiB) has been chosen as it is a common value set by the disk formatting tools nowadays. jira: https://issues.redhat.com/browse/RHEL-3341 --- .../actors/checkfirstpartitionoffset/actor.py | 24 ++++++ .../libraries/check_first_partition_offset.py | 52 +++++++++++++ .../test_check_first_partition_offset.py | 51 ++++++++++++ .../scangrubdevpartitionlayout/actor.py | 18 +++++ .../libraries/scan_layout.py | 64 +++++++++++++++ .../tests/test_scan_partition_layout.py | 78 +++++++++++++++++++ .../el7toel8/models/partitionlayout.py | 28 +++++++ 7 files changed, 315 insertions(+) create mode 100644 repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/actor.py create mode 100644 repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/libraries/check_first_partition_offset.py create mode 100644 repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/tests/test_check_first_partition_offset.py create mode 100644 repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/actor.py create mode 100644 repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/libraries/scan_layout.py create mode 100644 repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/tests/test_scan_partition_layout.py create mode 100644 repos/system_upgrade/el7toel8/models/partitionlayout.py diff --git a/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/actor.py b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/actor.py new file mode 100644 index 0000000000..cde27c2ad2 --- /dev/null +++ b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/actor.py @@ -0,0 +1,24 @@ +from leapp.actors import Actor +from leapp.libraries.actor import check_first_partition_offset +from leapp.models import FirmwareFacts, GRUBDevicePartitionLayout +from leapp.reporting import Report +from leapp.tags import ChecksPhaseTag, IPUWorkflowTag + + +class CheckFirstPartitionOffset(Actor): + """ + Check whether the first partition starts at the offset >=1MiB. + + The alignment of the first partition plays role in disk access speeds. Older tools placed the start of the first + partition at cylinder 63 (due to historical reasons connected to the INT13h BIOS API). However, grub core + binary is placed before the start of the first partition, meaning that not enough space causes bootloader + installation to fail. Modern partitioning tools place the first partition at >= 1MiB (cylinder 2048+). + """ + + name = 'check_first_partition_offset' + consumes = (FirmwareFacts, GRUBDevicePartitionLayout,) + produces = (Report,) + tags = (ChecksPhaseTag, IPUWorkflowTag,) + + def process(self): + check_first_partition_offset.check_first_partition_offset() diff --git a/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/libraries/check_first_partition_offset.py b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/libraries/check_first_partition_offset.py new file mode 100644 index 0000000000..fbd4e1788a --- /dev/null +++ b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/libraries/check_first_partition_offset.py @@ -0,0 +1,52 @@ +from leapp import reporting +from leapp.libraries.common.config import architecture +from leapp.libraries.stdlib import api +from leapp.models import FirmwareFacts, GRUBDevicePartitionLayout + +SAFE_OFFSET_BYTES = 1024*1024 # 1MiB + + +def check_first_partition_offset(): + if architecture.matches_architecture(architecture.ARCH_S390X): + return + + for fact in api.consume(FirmwareFacts): + if fact.firmware == 'efi': + return # Skip EFI system + + problematic_devices = [] + for grub_dev in api.consume(GRUBDevicePartitionLayout): + first_partition = min(grub_dev.partitions, key=lambda partition: partition.start_offset) + if first_partition.start_offset < SAFE_OFFSET_BYTES: + problematic_devices.append(grub_dev.device) + + if problematic_devices: + summary = ( + 'On the system booting by using BIOS, the in-place upgrade fails ' + 'when upgrading the GRUB2 bootloader if the boot disk\'s embedding area ' + 'does not contain enough space for the core image installation. ' + 'This results in a broken system, and can occur when the disk has been ' + 'partitioned manually, for example using the RHEL 6 fdisk utility.\n\n' + + 'The list of devices with small embedding area:\n' + '{0}.' + ) + problematic_devices_fmt = ['- {0}'.format(dev) for dev in problematic_devices] + + hint = ( + 'We recommend to perform a fresh installation of the RHEL 8 system ' + 'instead of performing the in-place upgrade.\n' + 'Another possibility is to reformat the devices so that there is ' + 'at least {0} kiB space before the first partition. ' + 'Note that this operation is not supported and does not have to be ' + 'always possible.' + ) + + reporting.create_report([ + reporting.Title('Found GRUB devices with too little space reserved before the first partition'), + reporting.Summary(summary.format('\n'.join(problematic_devices_fmt))), + reporting.Remediation(hint=hint.format(SAFE_OFFSET_BYTES // 1024)), + reporting.Severity(reporting.Severity.HIGH), + reporting.Groups([reporting.Groups.BOOT]), + reporting.Groups([reporting.Groups.INHIBITOR]), + ]) diff --git a/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/tests/test_check_first_partition_offset.py b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/tests/test_check_first_partition_offset.py new file mode 100644 index 0000000000..e349ff7d73 --- /dev/null +++ b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/tests/test_check_first_partition_offset.py @@ -0,0 +1,51 @@ +import pytest + +from leapp import reporting +from leapp.libraries.actor import check_first_partition_offset +from leapp.libraries.common import grub +from leapp.libraries.common.testutils import create_report_mocked, CurrentActorMocked +from leapp.libraries.stdlib import api +from leapp.models import FirmwareFacts, GRUBDevicePartitionLayout, PartitionInfo +from leapp.reporting import Report +from leapp.utils.report import is_inhibitor + + +@pytest.mark.parametrize( + ('devices', 'should_report'), + [ + ( + [ + GRUBDevicePartitionLayout(device='/dev/vda', + partitions=[PartitionInfo(part_device='/dev/vda1', start_offset=32256)]) + ], + True + ), + ( + [ + GRUBDevicePartitionLayout(device='/dev/vda', + partitions=[PartitionInfo(part_device='/dev/vda1', start_offset=1024*1025)]) + ], + False + ), + ( + [ + GRUBDevicePartitionLayout(device='/dev/vda', + partitions=[PartitionInfo(part_device='/dev/vda1', start_offset=1024*1024)]) + ], + False + ) + ] +) +def test_bad_offset_reported(monkeypatch, devices, should_report): + def consume_mocked(model_cls): + if model_cls == FirmwareFacts: + return [FirmwareFacts(firmware='bios')] + return devices + + monkeypatch.setattr(api, 'consume', consume_mocked) + monkeypatch.setattr(api, 'current_actor', CurrentActorMocked()) + monkeypatch.setattr(reporting, 'create_report', create_report_mocked()) + + check_first_partition_offset.check_first_partition_offset() + + assert bool(reporting.create_report.called) == should_report diff --git a/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/actor.py b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/actor.py new file mode 100644 index 0000000000..0db93aba75 --- /dev/null +++ b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/actor.py @@ -0,0 +1,18 @@ +from leapp.actors import Actor +from leapp.libraries.actor import scan_layout as scan_layout_lib +from leapp.models import GRUBDevicePartitionLayout, GrubInfo +from leapp.tags import FactsPhaseTag, IPUWorkflowTag + + +class ScanGRUBDevicePartitionLayout(Actor): + """ + Scan all identified GRUB devices for their partition layout. + """ + + name = 'scan_grub_device_partition_layout' + consumes = (GrubInfo,) + produces = (GRUBDevicePartitionLayout,) + tags = (FactsPhaseTag, IPUWorkflowTag,) + + def process(self): + scan_layout_lib.scan_grub_device_partition_layout() diff --git a/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/libraries/scan_layout.py b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/libraries/scan_layout.py new file mode 100644 index 0000000000..bb2e6d9e0d --- /dev/null +++ b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/libraries/scan_layout.py @@ -0,0 +1,64 @@ +from leapp.libraries.stdlib import api, CalledProcessError, run +from leapp.models import GRUBDevicePartitionLayout, GrubInfo, PartitionInfo + +SAFE_OFFSET_BYTES = 1024*1024 # 1MiB + + +def split_on_space_segments(line): + fragments = (fragment.strip() for fragment in line.split(' ')) + return [fragment for fragment in fragments if fragment] + + +def get_partition_layout(device): + try: + partition_table = run(['fdisk', '-l', '-u=sectors', device], split=True)['stdout'] + except CalledProcessError as err: + # Unlikely - if the disk has no partition table, `fdisk` terminates with 0 (no err). Fdisk exits with an err + # when the device does not exists, or if it is too small to contain a partition table. + + err_msg = 'Failed to run `fdisk` to obtain the partition table of the device {0}. Full error: \'{1}\'' + api.current_logger().error(err_msg.format(device, str(err))) + return None + + table_iter = iter(partition_table) + + for line in table_iter: + if not line.startswith('Units'): + # We are still reading general device information and not the table itself + continue + + unit = line.split('=')[2].strip() # Contains '512 bytes' + unit = int(unit.split(' ')[0].strip()) + break # First line of the partition table header + + for line in table_iter: + line = line.strip() + if not line.startswith('Device'): + continue + + part_all_attrs = split_on_space_segments(line) + break + + partitions = [] + for partition_line in table_iter: + # Fields: Device Boot Start End Sectors Size Id Type + # The line looks like: `/dev/vda1 * 2048 2099199 2097152 1G 83 Linux` + part_info = split_on_space_segments(partition_line) + + # If the partition is not bootable, the Boot column might be empty + part_device = part_info[0] + part_start = int(part_info[2]) if len(part_info) == len(part_all_attrs) else int(part_info[1]) + partitions.append(PartitionInfo(part_device=part_device, start_offset=part_start*unit)) + + return GRUBDevicePartitionLayout(device=device, partitions=partitions) + + +def scan_grub_device_partition_layout(): + grub_devices = next(api.consume(GrubInfo), None) + if not grub_devices: + return + + for device in grub_devices.orig_devices: + dev_info = get_partition_layout(device) + if dev_info: + api.produce(dev_info) diff --git a/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/tests/test_scan_partition_layout.py b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/tests/test_scan_partition_layout.py new file mode 100644 index 0000000000..37bb5bcf4e --- /dev/null +++ b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/tests/test_scan_partition_layout.py @@ -0,0 +1,78 @@ +from collections import namedtuple + +import pytest + +from leapp.libraries.actor import scan_layout as scan_layout_lib +from leapp.libraries.common import grub +from leapp.libraries.common.testutils import create_report_mocked, produce_mocked +from leapp.libraries.stdlib import api +from leapp.models import GRUBDevicePartitionLayout, GrubInfo +from leapp.utils.report import is_inhibitor + +Device = namedtuple('Device', ['name', 'partitions', 'sector_size']) +Partition = namedtuple('Partition', ['name', 'start_offset']) + + +@pytest.mark.parametrize( + 'devices', + [ + ( + Device(name='/dev/vda', sector_size=512, + partitions=[Partition(name='/dev/vda1', start_offset=63), + Partition(name='/dev/vda2', start_offset=1000)]), + Device(name='/dev/vdb', sector_size=1024, + partitions=[Partition(name='/dev/vdb1', start_offset=100), + Partition(name='/dev/vdb2', start_offset=20000)]) + ), + ( + Device(name='/dev/vda', sector_size=512, + partitions=[Partition(name='/dev/vda1', start_offset=111), + Partition(name='/dev/vda2', start_offset=1000)]), + ) + ] +) +def test_get_partition_layout(monkeypatch, devices): + device_to_fdisk_output = {} + for device in devices: + fdisk_output = [ + 'Disk {0}: 42.9 GB, 42949672960 bytes, 83886080 sectors'.format(device.name), + 'Units = sectors of 1 * {sector_size} = {sector_size} bytes'.format(sector_size=device.sector_size), + 'Sector size (logical/physical): 512 bytes / 512 bytes', + 'I/O size (minimum/optimal): 512 bytes / 512 bytes', + 'Disk label type: dos', + 'Disk identifier: 0x0000000da', + '', + ' Device Boot Start End Blocks Id System', + ] + for part in device.partitions: + part_line = '{0} * {1} 2099199 1048576 83 Linux'.format(part.name, part.start_offset) + fdisk_output.append(part_line) + + device_to_fdisk_output[device.name] = fdisk_output + + def mocked_run(cmd, *args, **kwargs): + assert cmd[:3] == ['fdisk', '-l', '-u=sectors'] + device = cmd[3] + output = device_to_fdisk_output[device] + return {'stdout': output} + + def consume_mocked(*args, **kwargs): + yield GrubInfo(orig_devices=[device.name for device in devices]) + + monkeypatch.setattr(scan_layout_lib, 'run', mocked_run) + monkeypatch.setattr(api, 'produce', produce_mocked()) + monkeypatch.setattr(api, 'consume', consume_mocked) + + scan_layout_lib.scan_grub_device_partition_layout() + + assert api.produce.called == len(devices) + + dev_name_to_desc = {dev.name: dev for dev in devices} + + for message in api.produce.model_instances: + assert isinstance(message, GRUBDevicePartitionLayout) + dev = dev_name_to_desc[message.device] + + expected_part_name_to_start = {part.name: part.start_offset*dev.sector_size for part in dev.partitions} + actual_part_name_to_start = {part.part_device: part.start_offset for part in message.partitions} + assert expected_part_name_to_start == actual_part_name_to_start diff --git a/repos/system_upgrade/el7toel8/models/partitionlayout.py b/repos/system_upgrade/el7toel8/models/partitionlayout.py new file mode 100644 index 0000000000..c648328347 --- /dev/null +++ b/repos/system_upgrade/el7toel8/models/partitionlayout.py @@ -0,0 +1,28 @@ +from leapp.models import fields, Model +from leapp.topics import SystemInfoTopic + + +class PartitionInfo(Model): + """ + Information about a single partition. + """ + topic = SystemInfoTopic + + part_device = fields.String() + """ Partition device """ + + start_offset = fields.Integer() + """ Partition start - offset from the start of the block device in bytes """ + + +class GRUBDevicePartitionLayout(Model): + """ + Information about partition layout of a GRUB device. + """ + topic = SystemInfoTopic + + device = fields.String() + """ GRUB device """ + + partitions = fields.List(fields.Model(PartitionInfo)) + """ List of partitions present on the device """ From 17d180728925fdf606006a89d7428d8570d21153 Mon Sep 17 00:00:00 2001 From: Petr Stodulka Date: Wed, 24 Apr 2024 01:09:51 +0200 Subject: [PATCH 2/2] boot: Skip checks of first partition offset for for gpt partition table This is extension of the previous commit. The original problem that we are trying to resolve is to be sure the embedding area (MBR gap) has expected size. This is irrelevant in case of GPT partition table is used on a device. The fdisk output format is in case of GPT disk label different, which breaks the parsing, resulting in empty list of partitions in related GRUBDevicePartitionLayout msg. For now, let's skip produce of msgs for "GPT devices". As a seatbelt, ignore processing of messages with empty partitions field, expecting that such a device does not contain MBR. We want to prevent false positive inhibitors (and FP blocking errors). We expect that total number of machines with small embedding area is very minor in total numbers, so even if we would miss something (which is not expected now to our best knowledge) it's still good trade-off as the major goal is to reduce number of machines that have problems with the in-place upgrade. The solution can be updated in future if there is a reason for it. --- .../libraries/check_first_partition_offset.py | 7 +++++ .../test_check_first_partition_offset.py | 16 +++++++++++ .../libraries/scan_layout.py | 28 +++++++++++++++++++ .../tests/test_scan_partition_layout.py | 5 ++++ 4 files changed, 56 insertions(+) diff --git a/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/libraries/check_first_partition_offset.py b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/libraries/check_first_partition_offset.py index fbd4e1788a..fca9c3ff4c 100644 --- a/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/libraries/check_first_partition_offset.py +++ b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/libraries/check_first_partition_offset.py @@ -16,6 +16,13 @@ def check_first_partition_offset(): problematic_devices = [] for grub_dev in api.consume(GRUBDevicePartitionLayout): + if not grub_dev.partitions: + # NOTE(pstodulk): In case of empty partition list we have nothing to do. + # This can could happen when the fdisk output is different then expected. + # E.g. when GPT partition table is used on the disk. We are right now + # interested strictly about MBR only, so ignoring these cases. + # This is seatbelt, as the msg should not be produced for GPT at all. + continue first_partition = min(grub_dev.partitions, key=lambda partition: partition.start_offset) if first_partition.start_offset < SAFE_OFFSET_BYTES: problematic_devices.append(grub_dev.device) diff --git a/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/tests/test_check_first_partition_offset.py b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/tests/test_check_first_partition_offset.py index e349ff7d73..f925f7d4ce 100644 --- a/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/tests/test_check_first_partition_offset.py +++ b/repos/system_upgrade/el7toel8/actors/checkfirstpartitionoffset/tests/test_check_first_partition_offset.py @@ -20,6 +20,16 @@ ], True ), + ( + [ + GRUBDevicePartitionLayout(device='/dev/vda', + partitions=[ + PartitionInfo(part_device='/dev/vda2', start_offset=1024*1025), + PartitionInfo(part_device='/dev/vda1', start_offset=32256) + ]) + ], + True + ), ( [ GRUBDevicePartitionLayout(device='/dev/vda', @@ -33,6 +43,12 @@ partitions=[PartitionInfo(part_device='/dev/vda1', start_offset=1024*1024)]) ], False + ), + ( + [ + GRUBDevicePartitionLayout(device='/dev/vda', partitions=[]) + ], + False ) ] ) diff --git a/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/libraries/scan_layout.py b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/libraries/scan_layout.py index bb2e6d9e0d..f51bcda429 100644 --- a/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/libraries/scan_layout.py +++ b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/libraries/scan_layout.py @@ -31,6 +31,34 @@ def get_partition_layout(device): unit = int(unit.split(' ')[0].strip()) break # First line of the partition table header + # Discover disk label type: dos | gpt + for line in table_iter: + line = line.strip() + if not line.startswith('Disk label type'): + continue + disk_type = line.split(':')[1].strip() + break + + if disk_type == 'gpt': + api.current_logger().info( + 'Detected GPT partition table. Skipping produce of GRUBDevicePartitionLayout message.' + ) + # NOTE(pstodulk): The GPT table has a different output format than + # expected below, example (ignore start/end lines): + # --------------------------- start ---------------------------------- + # # Start End Size Type Name + # 1 2048 4095 1M BIOS boot + # 2 4096 2101247 1G Microsoft basic + # 3 2101248 41940991 19G Linux LVM + # ---------------------------- end ----------------------------------- + # But mainly, in case of GPT, we have nothing to actually check as + # we are gathering this data now mainly to get information about the + # actual size of embedding area (MBR gap). In case of GPT, there is + # bios boot / prep boot partition, which has always 1 MiB and fulfill + # our expectations. So skip in this case another processing and generation + # of the msg. Let's improve it in future if we find a reason for it. + return None + for line in table_iter: line = line.strip() if not line.startswith('Device'): diff --git a/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/tests/test_scan_partition_layout.py b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/tests/test_scan_partition_layout.py index 37bb5bcf4e..5402537980 100644 --- a/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/tests/test_scan_partition_layout.py +++ b/repos/system_upgrade/el7toel8/actors/scangrubdevpartitionlayout/tests/test_scan_partition_layout.py @@ -76,3 +76,8 @@ def consume_mocked(*args, **kwargs): expected_part_name_to_start = {part.name: part.start_offset*dev.sector_size for part in dev.partitions} actual_part_name_to_start = {part.part_device: part.start_offset for part in message.partitions} assert expected_part_name_to_start == actual_part_name_to_start + + +def test_get_partition_layout_gpt(monkeypatch): + # TODO(pstodulk): skipping for now, due to time pressure. Testing for now manually. + pass