From 4dccf78d69bbd6bab5b175d795ce506020a7963d Mon Sep 17 00:00:00 2001 From: Adam Williamson Date: Mon, 20 Feb 2017 16:34:31 -0800 Subject: [PATCH] Add a compose ID parser, tweak short and version rules This adds a `parse_compose_id` function to `composeinfo`. It can parse most compose IDs back into their component parts. As part of this, we tweak the rules for short names and versions a little. The `RELEASE_SHORT_RE` regex now allows upper-case characters, as Fedora has been using these in short names for some time (only `create_release_id` actually validated short names, up until now, which is why we hadn't noticed this inconsistency). The `RELEASE_VERSION_RE` regex now does *not* allow absolutely any string that doesn't start with a digit to be used as a version; the only string version allowed is 'Rawhide' (this is the only one that exists in the wild so far as I know). This is to make it possible to write a sane parser at all. We also fix `get_date_type_respin` to work with the new `ci` type. We could possibly go further than this and disallow the use of dashes in short names; if we did that, parsing compose IDs would become much less difficult and slightly more reliable. But it would require us to either rename or 'grandfather in' Fedora's existing short names with dashes in them, which are Fedora-Atomic, Fedora-Docker and Fedora-Cloud. It would be reasonably easy to write a parser and regex which special case those exact names but disallow any other with a dash. Signed-off-by: Adam Williamson --- productmd/common.py | 8 +- productmd/composeinfo.py | 144 +++++++++++++++++++++++++++- tests/test_common.py | 11 ++- tests/test_composeinfo.py | 196 +++++++++++++++++++++++++++++++++++++- 4 files changed, 348 insertions(+), 11 deletions(-) diff --git a/productmd/common.py b/productmd/common.py index 238a6e9..dc11ebd 100644 --- a/productmd/common.py +++ b/productmd/common.py @@ -93,12 +93,12 @@ def parse_nvra(nvra): return result -#: Validation regex for release short name: [a-z] followed by [a-z0-9] separated with dashes. -RELEASE_SHORT_RE = re.compile(r"^[a-z]+([a-z0-9]*-?[a-z0-9]+)*$") +#: Validation regex for release short name: [a-zA-Z] followed by [a-zA-Z0-9] separated with dashes. +RELEASE_SHORT_RE = re.compile(r"^[a-zA-Z]+([a-zA-Z0-9]*-?[a-zA-Z0-9]+)*$") -#: Validation regex for release version: any string or [0-9] separated with dots. -RELEASE_VERSION_RE = re.compile(r"^([^0-9].*|([0-9]+(\.?[0-9]+)*))$") +#: Validation regex for release version: 'Rawhide' or [0-9] separated with dots. +RELEASE_VERSION_RE = re.compile(r"^(Rawhide|[0-9]+(\.?[0-9]+)*)$") #: Supported release types. diff --git a/productmd/composeinfo.py b/productmd/composeinfo.py index e82007f..ba77d00 100644 --- a/productmd/composeinfo.py +++ b/productmd/composeinfo.py @@ -195,7 +195,7 @@ def verify_label(label): def get_date_type_respin(compose_id): - pattern = re.compile(r".*(?P\d{8})(?P\.nightly|\.n|\.test|\.t)?(\.(?P\d+))?.*") + pattern = re.compile(r".*(?P\d{8})(?P\.nightly|\.n|\.test|\.t|\.ci)?(\.(?P\d+))?.*") match = pattern.match(compose_id) if not match: return None, None, None @@ -208,11 +208,153 @@ def get_date_type_respin(compose_id): result["type"] = "nightly" elif result["type"] in (".test", ".t"): result["type"] = "test" + elif result["type"] == ".ci": + result["type"] = "ci" else: raise ValueError("Unknown compose type: %s" % result["type"]) return (result["date"], result["type"], int(result["respin"])) +def parse_compose_id(compose_id): + """Parse a compose ID back into its component values. Returns a + dict containing values for 'short', 'version', 'version_type', + 'bp_short', 'bp_version', 'bp_type', 'variant', 'date', + 'compose_type' and 'respin'. If the ID is not for a layered + compose, the 'bp_*' values will be ''. The 'variant value will + only be populated (as 'Client' or 'Server') for RHEL 5 composes + (see `ComposeInfo.create_compose_id`). 'date', 'compose_type' and + 'respin' are the output of `common.get_date_type_respin`. May + raise ValueError for pathologically unparseable compose IDs. Can + be fooled by very weird shortnames: for instance, if you were to + use 'f-26-r' as a shortname, resulting in a compose ID like + 'f-26-r-23-20170225.n.0', then this parser will read that as a + layered product compose ID, where bp_short is 'r', bp_version is + '23', short is 'f' and version is '26'. + """ + # init values + short = '' + version = '' + version_type = '' + bp_short = '' + bp_version = '' + bp_type = '' + variant = '' + + # find date, type, respin + (date, compose_type, respin) = get_date_type_respin(compose_id) + # now split on the date, we only care about what comes before + part = compose_id.rsplit(date, 1)[0][:-1] + + # Handle "HACK: there are 2 RHEL 5 composes" + if part.endswith("-Client"): + variant = "Client" + part = part[:-len('-Client')] + elif part.endswith("-Server"): + variant = "Server" + part = part[:-len('-Server')] + + # Next part back must be either a version type suffix or a version + # we don't know yet if this is the main version or the base + # version for a layered product + (part, somever) = part.rsplit('-', 1) + # See if it's a type_suffix + if somever.lower() in productmd.common.RELEASE_TYPES: + sometype = somever + (part, somever) = part.rsplit('-', 1) + else: + sometype = '' + + # what remains is either: + # short + # or: + # short-version(-version_type)-bp_short + # But this is where things get fun, because sadly, both short + # and bp_short could have - in them and version could have a type + # suffix. So, life is fun. Let's see if we can spot what looks + # like a '-version(-type)' component. Note that particularly evil + # shortnames can screw us up here: see the comment where we check + # the length of `goodmatches`. + elems = part.split('-') + # Only do this magic if we have at least 3 elems, because if we + # have 1 or 2, we know it's just the shortname. + if len(elems) > 2: + # use this to track all of the RELEASE_VERSION_RE matches we + # find + matches = [] + for (idx, cand) in enumerate(elems): + # can't be the first or the last + if idx == 0 or idx == len(elems) - 1: + continue + # now see if the cand looks like a version + match = RELEASE_VERSION_RE.match(cand) + if match: + matchver = match.group(1) + # check if the next element looks like a version type + nextel = elems[idx+1] + if nextel.lower() in productmd.common.RELEASE_TYPES: + # if we got *two* matches that look like + # -version-version_type- , we're pretty screwed + matchtype = nextel + else: + matchtype = '' + matches.append((matchver, matchtype, idx)) + + # find all matches that produce two valid short names + goodmatches = [] + for match in matches: + (_version, _version_type, idx) = match + _short = '-'.join(elems[:idx]) + if _version_type: + _bp_short = '-'.join(elems[idx+2:]) + else: + _bp_short = '-'.join(elems[idx+1:]) + if all(productmd.common.is_valid_release_short(shrt) for shrt in (_short, _bp_short)): + goodmatches.append(match) + + if len(goodmatches) > 1: + # we're boned. you have to work quite hard to get here, + # though. this will do it: 'F-26-F-26-RHEL-6-20170225.n.0' + # where the shortname and base shortname could be either + # 'F' and 'F-26-RHEL' or 'F-26-F' and 'RHEL'. + raise ValueError("Cannot parse compose ID %s as it contains more than one possible " + "-version(-version_type)- string" % compose_id) + + if goodmatches: + (version, version_type, idx) = goodmatches[0] + bp_version = somever + bp_type = sometype + short = '-'.join(elems[:idx]) + if version_type: + bp_short = '-'.join(elems[idx+2:]) + else: + bp_short = '-'.join(elems[idx+1:]) + + # if we didn't establish a version above, we must not be layered, + # and what remains is just short, and somever is version + if not short: + short = part + version = somever + version_type = sometype + + if not version_type: + version_type = 'ga' + if bp_version and not bp_type: + bp_type = 'ga' + + return { + 'short': short, + 'version': version, + 'version_type': version_type, + 'bp_short': bp_short, + 'bp_version': bp_version, + 'bp_type': bp_type, + 'variant': variant, + 'date': date, + 'compose_type': compose_type, + 'respin': respin + } + + def cmp_label(label1, label2): name1, ver1 = label1.rsplit("-", 1) name2, ver2 = label2.rsplit("-", 1) diff --git a/tests/test_common.py b/tests/test_common.py index 0b3e044..c2bb014 100755 --- a/tests/test_common.py +++ b/tests/test_common.py @@ -36,10 +36,10 @@ class TestRelease(unittest.TestCase): def test_valid_short(self): self.assertTrue(is_valid_release_short("f")) - self.assertFalse(is_valid_release_short("F")) + self.assertTrue(is_valid_release_short("F")) self.assertTrue(is_valid_release_short("fedora")) - self.assertFalse(is_valid_release_short("Fedora")) + self.assertTrue(is_valid_release_short("Fedora")) self.assertTrue(is_valid_release_short("fedora-server")) self.assertTrue(is_valid_release_short("fedora-server-23")) @@ -60,7 +60,10 @@ def test_valid_version(self): self.assertTrue(is_valid_release_version("1.0")) self.assertTrue(is_valid_release_version("1.1")) - self.assertTrue(is_valid_release_version("a")) + self.assertTrue(is_valid_release_version("Rawhide")) + self.assertFalse(is_valid_release_version("rawhide")) + + self.assertFalse(is_valid_release_version("a")) self.assertFalse(is_valid_release_version("1.a")) self.assertFalse(is_valid_release_version("1.1a")) @@ -71,8 +74,6 @@ def test_valid_version(self): self.assertFalse(is_valid_release_version("1.1.")) self.assertFalse(is_valid_release_version("1..1")) - self.assertTrue(is_valid_release_version("rawhide")) - def test_split_version(self): self.assertEqual(split_version("0"), [0]) self.assertEqual(split_version("1.0"), [1, 0]) diff --git a/tests/test_composeinfo.py b/tests/test_composeinfo.py index 2180e52..d41dde2 100755 --- a/tests/test_composeinfo.py +++ b/tests/test_composeinfo.py @@ -29,7 +29,7 @@ DIR = os.path.dirname(__file__) sys.path.insert(0, os.path.join(DIR, "..")) -from productmd.composeinfo import ComposeInfo, Variant, Release # noqa +from productmd.composeinfo import ComposeInfo, Variant, Release, parse_compose_id # noqa class TestComposeInfo(unittest.TestCase): @@ -194,6 +194,200 @@ def test_get_variants(self): self.assertEqual(ci.get_variants(), [variant]) self.assertEqual(ci.get_variants(arch='x86_64'), [variant]) + def test_parse_compose_id(self): + assert parse_compose_id('F-22-20160622.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'ga', + 'bp_short': '', + 'bp_version': '', + 'bp_type': '', + 'variant': '', + 'date': '20160622', + 'compose_type': 'production', + 'respin': 0 + } + assert parse_compose_id('F-22-20160622.n.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'ga', + 'bp_short': '', + 'bp_version': '', + 'bp_type': '', + 'variant': '', + 'date': '20160622', + 'compose_type': 'nightly', + 'respin': 0 + } + assert parse_compose_id('F-22-20160622.ci.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'ga', + 'bp_short': '', + 'bp_version': '', + 'bp_type': '', + 'variant': '', + 'date': '20160622', + 'compose_type': 'ci', + 'respin': 0 + } + assert parse_compose_id('F-22-20160622.t.1') == { + 'short': 'F', + 'version': '22', + 'version_type': 'ga', + 'bp_short': '', + 'bp_version': '', + 'bp_type': '', + 'variant': '', + 'date': '20160622', + 'compose_type': 'test', + 'respin': 1 + } + assert parse_compose_id('F-22-updates-20160622.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'updates', + 'bp_short': '', + 'bp_version': '', + 'bp_type': '', + 'variant': '', + 'date': '20160622', + 'compose_type': 'production', + 'respin': 0 + } + assert parse_compose_id('F-22-updates-20160622.n.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'updates', + 'bp_short': '', + 'bp_version': '', + 'bp_type': '', + 'variant': '', + 'date': '20160622', + 'compose_type': 'nightly', + 'respin': 0 + } + assert parse_compose_id('F-22-BASE-3-20160622.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'ga', + 'bp_short': 'BASE', + 'bp_version': '3', + 'bp_type': 'ga', + 'variant': '', + 'date': '20160622', + 'compose_type': 'production', + 'respin': 0 + } + assert parse_compose_id('F-22-BASE-3-20160622.n.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'ga', + 'bp_short': 'BASE', + 'bp_version': '3', + 'bp_type': 'ga', + 'variant': '', + 'date': '20160622', + 'compose_type': 'nightly', + 'respin': 0 + } + assert parse_compose_id('F-22-updates-BASE-3-20160622.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'updates', + 'bp_short': 'BASE', + 'bp_version': '3', + 'bp_type': 'ga', + 'variant': '', + 'date': '20160622', + 'compose_type': 'production', + 'respin': 0 + } + assert parse_compose_id('F-22-updates-BASE-3-20160622.n.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'updates', + 'bp_short': 'BASE', + 'bp_version': '3', + 'bp_type': 'ga', + 'variant': '', + 'date': '20160622', + 'compose_type': 'nightly', + 'respin': 0 + } + assert parse_compose_id('F-22-BASE-3-updates-20160622.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'ga', + 'bp_short': 'BASE', + 'bp_version': '3', + 'bp_type': 'updates', + 'variant': '', + 'date': '20160622', + 'compose_type': 'production', + 'respin': 0 + } + assert parse_compose_id('F-22-BASE-3-updates-20160622.n.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'ga', + 'bp_short': 'BASE', + 'bp_version': '3', + 'bp_type': 'updates', + 'variant': '', + 'date': '20160622', + 'compose_type': 'nightly', + 'respin': 0 + } + assert parse_compose_id('F-22-updates-BASE-3-updates-20160622.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'updates', + 'bp_short': 'BASE', + 'bp_version': '3', + 'bp_type': 'updates', + 'variant': '', + 'date': '20160622', + 'compose_type': 'production', + 'respin': 0 + } + assert parse_compose_id('F-22-updates-BASE-3-updates-20160622.n.0') == { + 'short': 'F', + 'version': '22', + 'version_type': 'updates', + 'bp_short': 'BASE', + 'bp_version': '3', + 'bp_type': 'updates', + 'variant': '', + 'date': '20160622', + 'compose_type': 'nightly', + 'respin': 0 + } + assert parse_compose_id('Fedora-Rawhide-updates-RHEL-6.3.4-20160513.t.1') == { + 'short': 'Fedora', + 'version': 'Rawhide', + 'version_type': 'updates', + 'bp_short': 'RHEL', + 'bp_version': '6.3.4', + 'bp_type': 'ga', + 'variant': '', + 'date': '20160513', + 'compose_type': 'test', + 'respin': 1 + } + assert parse_compose_id('rhel-5-updates-Server-20160523.2') == { + 'short': 'rhel', + 'version': '5', + 'version_type': 'updates', + 'bp_short': '', + 'bp_version': '', + 'bp_type': '', + 'variant': 'Server', + 'date': '20160523', + 'compose_type': 'production', + 'respin': 2 + } + class TestCreateComposeID(unittest.TestCase): def setUpRelease(self, compose_type, release_type, bp_type=None):