diff --git a/pontos/cpe/_cpe.py b/pontos/cpe/_cpe.py index 308e8d96c..1a567a927 100644 --- a/pontos/cpe/_cpe.py +++ b/pontos/cpe/_cpe.py @@ -52,23 +52,23 @@ def is_formatted_string_binding(cpe: str) -> bool: return cpe.startswith("cpe:2.3:") -def _remove_backslash(value: str) -> str: +def convert_double_backslash(value: str) -> str: """ - Remove a single backslash + Convert a double backslash into s single backslash """ return re.sub("\\\\(\\W)", lambda match: match.group(1), value) def _url_quote(value: str) -> str: """ - Quote value according to the pct_encode function from the spec + Quote value according to the pct_encode function from the spec for uri format """ return urllib.parse.quote(value, safe="").lower() def _url_unquote(value: str) -> str: """ - Un-quote value according to the the spec + Un-quote value according to the the spec for uri format """ return urllib.parse.unquote(value) @@ -121,7 +121,7 @@ def unpack_edition(edition: str) -> dict[str, Optional[str]]: ) -def bind_value_for_fs(value: Optional[str]) -> str: +def bind_value_for_formatted_string(value: Optional[str]) -> str: """ Convert an attribute value for formatted string representation """ @@ -136,7 +136,8 @@ def bind_value_for_fs(value: Optional[str]) -> str: def _add_quoting(value: str) -> str: """ - Add quoting for parsing attributes from formatted string format + Add quoting for parsing attributes from formatted string format to + Well-Formed CPE Name Data Model (WFN) """ result = "" index = 0 @@ -144,19 +145,23 @@ def _add_quoting(value: str) -> str: while index < len(value): c = value[index] - if c.isalnum() or c in ["_"]: # not sure about "-" and "~" + if c.isalnum() or c in ["_"]: + # just add character result += c index += 1 embedded = True continue if c == "\\": + # keep escaped character result += value[index : index + 2] index += 2 embedded = True continue if c == ANY: + # An unquoted asterisk must appear at the beginning or + # end of the string. if index == 0 or index == (len(value) - 1): result += c index += 1 @@ -168,6 +173,8 @@ def _add_quoting(value: str) -> str: f"of '{value}'" ) if c == "?": + # An unquoted question mark must appear at the beginning or + # end of the string, or in a leading or trailing sequence if ( ( # ? is legal at the beginning or the end (index == 0) or (index == (len(value) - 1)) @@ -197,9 +204,9 @@ def _add_quoting(value: str) -> str: return result -def unbind_value_fs(value: Optional[str]) -> Optional[str]: +def unbind_value_from_formatted_string(value: Optional[str]) -> Optional[str]: """ - Convert a formatted string representation to an attribute value + Convert a formatted string representation to an attribute value for WNF """ if value is None or value == ANY or value == NA: return value @@ -230,7 +237,7 @@ def _transform_for_uri(value: str) -> str: if c == "\\": index += 1 next = value[index] - transformed += _url_quote(_remove_backslash(next)) + transformed += _url_quote(convert_double_backslash(next)) index += 1 continue @@ -321,6 +328,36 @@ def unbind_value_uri(value: Optional[str]) -> Optional[str]: return result +def unquote_attribute_value(value: Optional[str]) -> Optional[str]: + """ + Unquote a Well-Formed CPE Name Data Model (WFN) attribute value + """ + if not value or "\\" not in value: + # do nothing + return value + + index = 0 + result = "" + while index < len(value): + c = value[index] + if c == "\\": + next_c = value[index + 1] + if next_c in ["*", "?"]: + # keep escaped asterisks and question marks + result += f"{c}{next_c}" + else: + result += next_c + + index += 2 + continue + else: + result += c + + index += 1 + + return result + + def split_cpe(cpe: str) -> list[str]: """ Split a CPE into its parts @@ -348,7 +385,60 @@ def split_cpe(cpe: str) -> list[str]: return parts -@dataclass(frozen=True) # should require keywords only with Python >= 3.10 +@dataclass(frozen=True) +class CPEWellFormed: + """ + Represents a Common Platform Enumeration (CPE) name using the Well-Formed + CPE Name (WNF) Data Model. Attributes are quoted according to the WNF model. + + In most cases this class should not be used directly and the CPE class + should be used instead. + + Attributes: + part: Value should be "a" for application, "o" for operating system or + "h" for hardware + vendor: Person or organization that manufactured or created the product + product: Identifies the most common and recognizable title or name of + the product + version: A vendor-specific alphanumeric string characterizing the + particular release version of the product + update: A vendor-specific alphanumeric string characterizing the + particular update, service pack, or point release of the product + edition: The edition attribute is considered deprecated in the 2.3 + CPE specification, and it should be assigned the logical value ANY + except where required for backward compatibility with version 2.2 of + the CPE specification. This attribute is referred to as the “legacy + edition” attribute + language: Defines the language supported in the user interface of the + product (as language tags defined by RFC5646) + sw_edition: Characterizes how the product is tailored to a particular + market or class of end users. Extended attribute introduced with + version 2.3 of the CPE specification + target_sw: Characterizes the software computing environment within which + the product operates. Extended attribute introduced with + version 2.3 of the CPE specification + hardware_sw: Characterizes the instruction set architecture (e.g., x86) + on which the product operates. Extended attribute introduced with + version 2.3 of the CPE specification + other: Captures any other general descriptive or identifying information + which is vendor- or product-specific and which does not logically + fit in any other attribute value. Extended attribute introduced with + version 2.3 of the CPE specification + """ + + part: Part + vendor: str + product: str + version: Optional[str] = None + update: Optional[str] = None + edition: Optional[str] = None + language: Optional[str] = None + sw_edition: Optional[str] = None + target_sw: Optional[str] = None + target_hw: Optional[str] = None + other: Optional[str] = None + + class CPE: """ Represents a Common Platform Enumeration (CPE) name @@ -396,22 +486,51 @@ class CPE: print(cpe.vendor) # google print(cpe.product) # android - print(cpe.version) # 13\.0 + print(cpe.version) # 13.0 print(cpe.as_uri_binding()) # cpe:/o:google:android:13.0 """ - part: Part - vendor: str - product: str - version: Optional[str] = None - update: Optional[str] = None - edition: Optional[str] = None - language: Optional[str] = None - sw_edition: Optional[str] = None - target_sw: Optional[str] = None - target_hw: Optional[str] = None - other: Optional[str] = None - cpe_string: Optional[str] = None + def __init__( + self, + *, + cpe_string: Optional[str] = None, + part: Part, + vendor: str, + product: str, + version: Optional[str] = None, + update: Optional[str] = None, + edition: Optional[str] = None, + language: Optional[str] = None, + sw_edition: Optional[str] = None, + target_sw: Optional[str] = None, + target_hw: Optional[str] = None, + other: Optional[str] = None, + ) -> None: + self.cpe_string = cpe_string + self.__wnf__ = CPEWellFormed( + part=part, + vendor=vendor, + product=product, + version=version, + update=update, + edition=edition, + language=language, + sw_edition=sw_edition, + target_sw=target_sw, + target_hw=target_hw, + other=other, + ) + self.part = part + self.vendor = unquote_attribute_value(vendor) + self.product = unquote_attribute_value(product) + self.version = unquote_attribute_value(version) + self.update = unquote_attribute_value(update) + self.edition = unquote_attribute_value(edition) + self.language = unquote_attribute_value(language) + self.sw_edition = unquote_attribute_value(sw_edition) + self.target_sw = unquote_attribute_value(target_sw) + self.target_hw = unquote_attribute_value(target_hw) + self.other = unquote_attribute_value(other) @staticmethod def from_string(cpe: str) -> "CPE": @@ -472,7 +591,7 @@ def from_string(cpe: str) -> "CPE": "target_hw", "other", ], - [unbind_value_fs(a) for a in parts[3:]], + [unbind_value_from_formatted_string(a) for a in parts[3:]], ) ) @@ -512,16 +631,16 @@ def as_uri_binding(self) -> str: Converts the CPE to an URI binding """ part = self.part.value - vendor = bind_value_for_uri(self.vendor) - product = bind_value_for_uri(self.product) - version = bind_value_for_uri(self.version) - update = bind_value_for_uri(self.update) - language = bind_value_for_uri(self.language) - edition = bind_value_for_uri(self.edition) - sw_edition = bind_value_for_uri(self.sw_edition) - target_sw = bind_value_for_uri(self.target_sw) - target_hw = bind_value_for_uri(self.target_hw) - other = bind_value_for_uri(self.other) + vendor = bind_value_for_uri(self.__wnf__.vendor) + product = bind_value_for_uri(self.__wnf__.product) + version = bind_value_for_uri(self.__wnf__.version) + update = bind_value_for_uri(self.__wnf__.update) + language = bind_value_for_uri(self.__wnf__.language) + edition = bind_value_for_uri(self.__wnf__.edition) + sw_edition = bind_value_for_uri(self.__wnf__.sw_edition) + target_sw = bind_value_for_uri(self.__wnf__.target_sw) + target_hw = bind_value_for_uri(self.__wnf__.target_hw) + other = bind_value_for_uri(self.__wnf__.other) edition = pack_extended_attributes( edition, @@ -547,16 +666,16 @@ def as_formatted_string_binding(self) -> str: Converts the CPE to a formatted string binding """ part = self.part.value - vendor = bind_value_for_fs(self.vendor) - product = bind_value_for_fs(self.product) - version = bind_value_for_fs(self.version) - update = bind_value_for_fs(self.update) - edition = bind_value_for_fs(self.edition) - language = bind_value_for_fs(self.language) - sw_edition = bind_value_for_fs(self.sw_edition) - target_sw = bind_value_for_fs(self.target_sw) - target_hw = bind_value_for_fs(self.target_hw) - other = bind_value_for_fs(self.other) + vendor = bind_value_for_formatted_string(self.__wnf__.vendor) + product = bind_value_for_formatted_string(self.__wnf__.product) + version = bind_value_for_formatted_string(self.__wnf__.version) + update = bind_value_for_formatted_string(self.__wnf__.update) + edition = bind_value_for_formatted_string(self.__wnf__.edition) + language = bind_value_for_formatted_string(self.__wnf__.language) + sw_edition = bind_value_for_formatted_string(self.__wnf__.sw_edition) + target_sw = bind_value_for_formatted_string(self.__wnf__.target_sw) + target_hw = bind_value_for_formatted_string(self.__wnf__.target_hw) + other = bind_value_for_formatted_string(self.__wnf__.other) return ( f"cpe:2.3:{part}:{vendor}:{product}:{version}:{update}:" f"{edition}:{language}:{sw_edition}:{target_sw}:{target_hw}:{other}" @@ -580,17 +699,17 @@ def clone( all_android_versions = cpe.clone(version=ANY) """ args = { - "part": self.part, - "vendor": self.vendor, - "product": self.product, - "version": self.version, - "update": self.update, - "edition": self.edition, - "language": self.language, - "sw_edition": self.sw_edition, - "target_sw": self.target_sw, - "target_hw": self.target_hw, - "other": self.other, + "part": self.__wnf__.part, + "vendor": self.__wnf__.vendor, + "product": self.__wnf__.product, + "version": self.__wnf__.version, + "update": self.__wnf__.update, + "edition": self.__wnf__.edition, + "language": self.__wnf__.language, + "sw_edition": self.__wnf__.sw_edition, + "target_sw": self.__wnf__.target_sw, + "target_hw": self.__wnf__.target_hw, + "other": self.__wnf__.other, "cpe_string": self.cpe_string, } args.update(**kwargs) diff --git a/tests/cpe/test_cpe.py b/tests/cpe/test_cpe.py index c60ad0906..aae9d9d65 100644 --- a/tests/cpe/test_cpe.py +++ b/tests/cpe/test_cpe.py @@ -7,7 +7,13 @@ import unittest from pontos.cpe import ANY, CPE, NA, CPEParsingError, Part -from pontos.cpe._cpe import split_cpe +from pontos.cpe._cpe import ( + bind_value_for_formatted_string, + convert_double_backslash, + split_cpe, + unbind_value_from_formatted_string, + unquote_attribute_value, +) class SplitCpeTestCase(unittest.TestCase): @@ -61,6 +67,123 @@ def test_split_formatted_cpe(self): self.assertEqual(parts[12], "*") +class ConvertDoubleBackslashTestCase(unittest.TestCase): + def test_remove_backslash(self): + self.assertEqual(convert_double_backslash("foo-bar"), "foo-bar") + self.assertEqual(convert_double_backslash("foo\\bar"), "foo\\bar") + self.assertEqual(convert_double_backslash("foo\\\\bar"), "foo\\bar") + + +class UnbindValueFromFormattedStringTestCase(unittest.TestCase): + def test_unchanged(self): + self.assertIsNone(unbind_value_from_formatted_string(None)) + self.assertEqual(unbind_value_from_formatted_string(ANY), ANY) + self.assertEqual(unbind_value_from_formatted_string(NA), NA) + + self.assertEqual( + unbind_value_from_formatted_string("foo_bar"), "foo_bar" + ) + self.assertEqual( + unbind_value_from_formatted_string("foo\\:bar"), "foo\\:bar" + ) + self.assertEqual( + unbind_value_from_formatted_string("1\\.2\\.3"), "1\\.2\\.3" + ) + + def test_quoting(self): + self.assertEqual( + unbind_value_from_formatted_string("foo-bar"), "foo\\-bar" + ) + self.assertEqual( # not sure if this can happen and if it's valid + unbind_value_from_formatted_string("foo:bar"), "foo\\:bar" + ) + self.assertEqual( + unbind_value_from_formatted_string("1.2.3"), "1\\.2\\.3" + ) + + def test_asterisk(self): + self.assertEqual(unbind_value_from_formatted_string("*foo"), "*foo") + self.assertEqual(unbind_value_from_formatted_string("foo*"), "foo*") + self.assertEqual(unbind_value_from_formatted_string("foo\\*"), "foo\\*") + + with self.assertRaisesRegex( + CPEParsingError, + "An unquoted asterisk must appear at the beginning or end of " + "'foo\*bar'", + ): + unbind_value_from_formatted_string("foo*bar") + + with self.assertRaisesRegex( + CPEParsingError, + "An unquoted asterisk must appear at the beginning or end of " + "'\*\*foo'", + ): + unbind_value_from_formatted_string("**foo") + + def test_question_mark(self): + self.assertEqual(unbind_value_from_formatted_string("?foo"), "?foo") + self.assertEqual(unbind_value_from_formatted_string("??foo"), "??foo") + self.assertEqual(unbind_value_from_formatted_string("foo?"), "foo?") + self.assertEqual(unbind_value_from_formatted_string("foo??"), "foo??") + self.assertEqual(unbind_value_from_formatted_string("foo\\?"), "foo\\?") + + with self.assertRaisesRegex( + CPEParsingError, + "An unquoted question mark must appear at the beginning or end, " + "or in a leading or trailing sequence 'foo\?bar'", + ): + unbind_value_from_formatted_string("foo?bar") + + +class BindValueForFormattedStringTestCase(unittest.TestCase): + def test_any(self): + self.assertEqual(bind_value_for_formatted_string(None), ANY) + self.assertEqual(bind_value_for_formatted_string(""), ANY) + self.assertEqual(bind_value_for_formatted_string(ANY), ANY) + + def test_na(self): + self.assertEqual(bind_value_for_formatted_string(NA), NA) + + def test_remove_quoting(self): + self.assertEqual(bind_value_for_formatted_string("1\\.2\\.3"), "1.2.3") + # _ doesn't get quoted during unbinding therefore unquoting it here + # doesn't really make sense bit it's in the standard! + self.assertEqual( + bind_value_for_formatted_string("foo\\_bar"), "foo_bar" + ) + self.assertEqual( + bind_value_for_formatted_string("foo\\-bar"), "foo-bar" + ) + + def test_unchanged(self): + self.assertEqual( + bind_value_for_formatted_string("foo\\:bar"), "foo\\:bar" + ) + self.assertEqual(bind_value_for_formatted_string("?foo"), "?foo") + self.assertEqual(bind_value_for_formatted_string("foo*"), "foo*") + self.assertEqual(bind_value_for_formatted_string("foo\\*"), "foo\\*") + + +class UnquoteAttributeValueTestCase(unittest.TestCase): + def test_unchanged(self): + self.assertIsNone(unquote_attribute_value(None)) + self.assertEqual(unquote_attribute_value(""), "") + self.assertEqual(unquote_attribute_value(ANY), ANY) + self.assertEqual(unquote_attribute_value("?"), "?") + self.assertEqual(unquote_attribute_value("foo-bar"), "foo-bar") + self.assertEqual(unquote_attribute_value("foo_bar"), "foo_bar") + self.assertEqual(unquote_attribute_value("1.2.3"), "1.2.3") + + def test_special(self): + self.assertEqual(unquote_attribute_value("foo\\?bar"), "foo\\?bar") + self.assertEqual(unquote_attribute_value("foo\\*bar"), "foo\\*bar") + + def test_unquote(self): + self.assertEqual(unquote_attribute_value("foo\\\\bar"), "foo\\bar") + self.assertEqual(unquote_attribute_value("foo\\:bar"), "foo:bar") + self.assertEqual(unquote_attribute_value("1\\.2\\.3"), "1.2.3") + + class CPETestCase(unittest.TestCase): def test_uri_binding(self): cpe_string = "cpe:/o:microsoft:windows_xp:::pro" @@ -91,7 +214,6 @@ def test_uri_binding(self): cpe = CPE.from_string( "cpe:/a:foo%5cbar:big%24money_manager_2010:::~~special~ipod_touch~80gb~" ) - print(repr(cpe)) self.assertEqual( str(cpe), "cpe:/a:foo%5cbar:big%24money_manager_2010:::~~special~ipod_touch~80gb~", @@ -107,8 +229,8 @@ def test_uri_binding(self): self.assertTrue(cpe.is_uri_binding()) self.assertFalse(cpe.is_formatted_string_binding()) self.assertEqual(cpe.part, Part.APPLICATION) - self.assertEqual(cpe.vendor, "foo\\\\bar") - self.assertEqual(cpe.product, "big\$money_manager_2010") + self.assertEqual(cpe.vendor, "foo\\bar") + self.assertEqual(cpe.product, "big$money_manager_2010") self.assertEqual(cpe.version, ANY) self.assertEqual(cpe.update, ANY) self.assertIsNone(cpe.language) @@ -236,7 +358,7 @@ def test_uri_unbind_examples(self): self.assertEqual(cpe.part, Part.APPLICATION) self.assertEqual(cpe.vendor, "microsoft") self.assertEqual(cpe.product, "internet_explorer") - self.assertEqual(cpe.version, "8\.0\.6001") + self.assertEqual(cpe.version, "8.0.6001") self.assertEqual(cpe.update, "beta") self.assertIsNone(cpe.language) self.assertIsNone(cpe.edition) @@ -252,8 +374,8 @@ def test_uri_unbind_examples(self): self.assertEqual(cpe.part, Part.APPLICATION) self.assertEqual(cpe.vendor, "microsoft") self.assertEqual(cpe.product, "internet_explorer") - self.assertEqual(cpe.version, "8\.\*") - self.assertEqual(cpe.update, "sp\?") + self.assertEqual(cpe.version, "8.\\*") + self.assertEqual(cpe.update, "sp\\?") self.assertIsNone(cpe.language) self.assertIsNone(cpe.edition) self.assertIsNone(cpe.sw_edition) @@ -268,7 +390,7 @@ def test_uri_unbind_examples(self): self.assertEqual(cpe.part, Part.APPLICATION) self.assertEqual(cpe.vendor, "microsoft") self.assertEqual(cpe.product, "internet_explorer") - self.assertEqual(cpe.version, "8\.*") + self.assertEqual(cpe.version, "8.*") self.assertEqual(cpe.update, "sp?") self.assertIsNone(cpe.language) self.assertIsNone(cpe.edition) @@ -287,7 +409,7 @@ def test_uri_unbind_examples(self): self.assertEqual(cpe.part, Part.APPLICATION) self.assertEqual(cpe.vendor, "hp") self.assertEqual(cpe.product, "insight_diagnostics") - self.assertEqual(cpe.version, "7\.4\.0\.1570") + self.assertEqual(cpe.version, "7.4.0.1570") self.assertEqual(cpe.update, ANY) self.assertIsNone(cpe.language) self.assertIsNone(cpe.edition) @@ -305,7 +427,7 @@ def test_uri_unbind_examples(self): self.assertEqual(cpe.part, Part.APPLICATION) self.assertEqual(cpe.vendor, "hp") self.assertEqual(cpe.product, "openview_network_manager") - self.assertEqual(cpe.version, "7\.51") + self.assertEqual(cpe.version, "7.51") self.assertEqual(cpe.update, NA) self.assertIsNone(cpe.language) self.assertIsNone(cpe.edition) @@ -325,8 +447,8 @@ def test_uri_unbind_examples(self): self.assertTrue(cpe.is_uri_binding()) self.assertFalse(cpe.is_formatted_string_binding()) self.assertEqual(cpe.part, Part.APPLICATION) - self.assertEqual(cpe.vendor, "foo\~bar") - self.assertEqual(cpe.product, "big\~money_2010") + self.assertEqual(cpe.vendor, "foo~bar") + self.assertEqual(cpe.product, "big~money_2010") self.assertIsNone(cpe.version) self.assertIsNone(cpe.update) self.assertIsNone(cpe.language) @@ -442,7 +564,7 @@ def test_formatted_unbind_examples(self): self.assertEqual(cpe.part, Part.APPLICATION) self.assertEqual(cpe.vendor, "microsoft") self.assertEqual(cpe.product, "internet_explorer") - self.assertEqual(cpe.version, "8\.0\.6001") + self.assertEqual(cpe.version, "8.0.6001") self.assertEqual(cpe.update, "beta") self.assertEqual(cpe.edition, ANY) self.assertEqual(cpe.language, ANY) @@ -460,7 +582,7 @@ def test_formatted_unbind_examples(self): self.assertEqual(cpe.part, Part.APPLICATION) self.assertEqual(cpe.vendor, "microsoft") self.assertEqual(cpe.product, "internet_explorer") - self.assertEqual(cpe.version, "8\.*") + self.assertEqual(cpe.version, "8.*") self.assertEqual(cpe.update, "sp?") self.assertEqual(cpe.language, ANY) self.assertEqual(cpe.edition, ANY) @@ -478,7 +600,7 @@ def test_formatted_unbind_examples(self): self.assertEqual(cpe.part, Part.APPLICATION) self.assertEqual(cpe.vendor, "hp") self.assertEqual(cpe.product, "insight_diagnostics") - self.assertEqual(cpe.version, "7\.4\.0\.1570") + self.assertEqual(cpe.version, "7.4.0.1570") self.assertEqual(cpe.update, NA) self.assertEqual(cpe.language, ANY) self.assertEqual(cpe.edition, ANY) @@ -504,8 +626,8 @@ def test_formatted_unbind_examples(self): self.assertFalse(cpe.is_uri_binding()) self.assertTrue(cpe.is_formatted_string_binding()) self.assertEqual(cpe.part, Part.APPLICATION) - self.assertEqual(cpe.vendor, "foo\\\\bar") - self.assertEqual(cpe.product, "big\$money") + self.assertEqual(cpe.vendor, "foo\\bar") + self.assertEqual(cpe.product, "big$money") self.assertEqual(cpe.version, "2010") self.assertEqual(cpe.update, ANY) self.assertEqual(cpe.edition, ANY) @@ -520,8 +642,8 @@ def test_formatted_unbind_examples(self): self.assertTrue(cpe.is_formatted_string_binding()) self.assertEqual(cpe.part, Part.APPLICATION) self.assertEqual(cpe.vendor, "foo") - self.assertEqual(cpe.product, "bar\:mumble") - self.assertEqual(cpe.version, "1\.0") + self.assertEqual(cpe.product, "bar:mumble") + self.assertEqual(cpe.version, "1.0") self.assertEqual(cpe.update, ANY) self.assertEqual(cpe.edition, ANY) self.assertEqual(cpe.language, ANY) @@ -605,5 +727,5 @@ def test_clone(self): ) cpe2 = cpe.clone(version=ANY) self.assertIsNot(cpe, cpe2) - self.assertEqual(cpe.version, "7\\.51") + self.assertEqual(cpe.version, "7.51") self.assertEqual(cpe2.version, ANY)