From 81858b30ec2bba18d487007ea5fb470b9aee997c Mon Sep 17 00:00:00 2001 From: John Kastner <130772734+john-h-kastner-aws@users.noreply.github.com> Date: Mon, 25 Nov 2024 16:33:27 -0500 Subject: [PATCH] Don't warn on graphical ascii and spaces in idents (#1336) Signed-off-by: John Kastner --- cedar-policy-validator/src/diagnostics.rs | 2 + .../src/diagnostics/validation_warnings.rs | 8 +- cedar-policy-validator/src/str_checks.rs | 87 +++++++++++++++++-- cedar-policy/CHANGELOG.md | 5 ++ cedar-policy/src/api/err.rs | 17 ++-- 5 files changed, 104 insertions(+), 15 deletions(-) diff --git a/cedar-policy-validator/src/diagnostics.rs b/cedar-policy-validator/src/diagnostics.rs index f79ae2eb9..4ee0fd200 100644 --- a/cedar-policy-validator/src/diagnostics.rs +++ b/cedar-policy-validator/src/diagnostics.rs @@ -487,11 +487,13 @@ impl ValidationWarning { source_loc: Option, policy_id: PolicyID, id: impl Into, + confusable_character: char, ) -> Self { validation_warnings::ConfusableIdentifier { source_loc, policy_id, id: id.into(), + confusable_character, } .into() } diff --git a/cedar-policy-validator/src/diagnostics/validation_warnings.rs b/cedar-policy-validator/src/diagnostics/validation_warnings.rs index f8a2e8c60..27a1b320f 100644 --- a/cedar-policy-validator/src/diagnostics/validation_warnings.rs +++ b/cedar-policy-validator/src/diagnostics/validation_warnings.rs @@ -100,7 +100,11 @@ impl Diagnostic for MixedScriptIdentifier { /// Warning for identifiers containing confusable characters #[derive(Debug, Clone, PartialEq, Error, Eq, Hash)] -#[error("for policy `{policy_id}`, identifier `{id}` contains characters that fall outside of the General Security Profile for Identifiers")] +#[error( + "for policy `{policy_id}`, identifier `{}` contains the character `{}` which is not a printable ASCII character and falls outside of the General Security Profile for Identifiers", + .id.escape_debug(), + .confusable_character.escape_debug() +)] pub struct ConfusableIdentifier { /// Source location pub source_loc: Option, @@ -108,6 +112,8 @@ pub struct ConfusableIdentifier { pub policy_id: PolicyID, /// Identifier containing confusable characters pub id: String, + /// The specific character we're not happy about + pub confusable_character: char, } impl Diagnostic for ConfusableIdentifier { diff --git a/cedar-policy-validator/src/str_checks.rs b/cedar-policy-validator/src/str_checks.rs index 203a151a6..4b2c13bf7 100644 --- a/cedar-policy-validator/src/str_checks.rs +++ b/cedar-policy-validator/src/str_checks.rs @@ -78,11 +78,15 @@ fn permissable_ident( policy_id.clone(), s, )) - } else if !s.chars().all(|c| c.identifier_allowed()) { + } else if let Some(c) = s + .chars() + .find(|c| *c != ' ' && !c.is_ascii_graphic() && !c.identifier_allowed()) + { Some(ValidationWarning::confusable_identifier( loc.cloned(), policy_id.clone(), s, + c, )) } else if !s.is_single_script() { Some(ValidationWarning::mixed_script_identifier( @@ -119,7 +123,9 @@ mod test { use cedar_policy_core::{ ast::PolicySet, parser::{parse_policy, Loc}, + test_utils::{expect_err, ExpectedErrorMessageBuilder}, }; + use cool_asserts::assert_matches; use std::sync::Arc; #[test] fn strs() { @@ -148,14 +154,77 @@ mod test { permissable_ident(None, &PolicyID::from_string("0"), "test"), None ); - match permissable_ident(None, &PolicyID::from_string("0"), "is​Admin") { - Some(ValidationWarning::ConfusableIdentifier(_)) => (), - o => panic!("should have produced ConfusableIdentifier: {:?}", o), - }; - match permissable_ident(None, &PolicyID::from_string("0"), "say_һello") { - Some(ValidationWarning::MixedScriptIdentifier(_)) => (), - o => panic!("should have produced MixedScriptIdentifier: {:?}", o), - }; + assert_eq!( + permissable_ident( + None, + &PolicyID::from_string("0"), + "https://www.example.com/test?foo=bar&bar=baz#buz" + ), + None + ); + assert_eq!( + permissable_ident( + None, + &PolicyID::from_string("0"), + "http://example.com/query{firstName}-{lastName}" + ), + None + ); + assert_eq!( + permissable_ident( + None, + &PolicyID::from_string("0"), + "example_user+1@example.com" + ), + None + ); + assert_eq!( + permissable_ident(None, &PolicyID::from_string("0"), "get /pets/{petId}"), + None + ); + + assert_matches!(permissable_ident(None, &PolicyID::from_string("0"), "is​Admin"), Some(warning) => { + expect_err( + "", + &miette::Report::new(warning), + &ExpectedErrorMessageBuilder::error(r#"for policy `0`, identifier `is\u{200b}Admin` contains the character `\u{200b}` which is not a printable ASCII character and falls outside of the General Security Profile for Identifiers"#) + .build()); + }); + assert_matches!(permissable_ident(None, &PolicyID::from_string("0"), "new\nline"), Some(warning) => { + expect_err( + "", + &miette::Report::new(warning), + &ExpectedErrorMessageBuilder::error(r#"for policy `0`, identifier `new\nline` contains the character `\n` which is not a printable ASCII character and falls outside of the General Security Profile for Identifiers"#) + .build()); + }); + assert_matches!(permissable_ident(None, &PolicyID::from_string("0"), "null\0"), Some(warning) => { + expect_err( + "", + &miette::Report::new(warning), + &ExpectedErrorMessageBuilder::error(r#"for policy `0`, identifier `null\0` contains the character `\0` which is not a printable ASCII character and falls outside of the General Security Profile for Identifiers"#) + .build()); + }); + assert_matches!(permissable_ident(None, &PolicyID::from_string("0"), "delete\x7f"), Some(warning) => { + expect_err( + "", + &miette::Report::new(warning), + &ExpectedErrorMessageBuilder::error(r#"for policy `0`, identifier `delete\u{7f}` contains the character `\u{7f}` which is not a printable ASCII character and falls outside of the General Security Profile for Identifiers"#) + .build()); + }); + assert_matches!(permissable_ident(None, &PolicyID::from_string("0"), "🍌"), Some(warning) => { + expect_err( + "", + &miette::Report::new(warning), + &ExpectedErrorMessageBuilder::error(r#"for policy `0`, identifier `🍌` contains the character `🍌` which is not a printable ASCII character and falls outside of the General Security Profile for Identifiers"#) + .build()); + }); + assert_matches!(permissable_ident(None, &PolicyID::from_string("0"), "say_һello") , Some(warning) => { + expect_err( + "", + &miette::Report::new(warning), + &ExpectedErrorMessageBuilder::error(r#"for policy `0`, identifier `say_һello` contains mixed scripts"#) + .build()); + }); } #[test] diff --git a/cedar-policy/CHANGELOG.md b/cedar-policy/CHANGELOG.md index 43b2757a4..018c6cba6 100644 --- a/cedar-policy/CHANGELOG.md +++ b/cedar-policy/CHANGELOG.md @@ -19,6 +19,11 @@ Cedar Language Version: TBD - Added protobuf and JSON generation code to `cedar-policy-cli`. - Added a new get helper method to Context that allows easy extraction of generic values from the context by key. This method simplifies the common use case of retrieving values from Context objects. +### Changed + +- Stopped emitting warnings for identifiers containing certain printable ASCII + characters (e.g., `/` and `:`) (#1336, resolving #621) + ## [4.2.2] - 2024-11-11 Cedar Language version: 4.1 diff --git a/cedar-policy/src/api/err.rs b/cedar-policy/src/api/err.rs index d81dac72b..db66fd4e8 100644 --- a/cedar-policy/src/api/err.rs +++ b/cedar-policy/src/api/err.rs @@ -522,23 +522,30 @@ pub mod validation_warnings; #[derive(Debug, Clone, Error, Diagnostic)] #[non_exhaustive] pub enum ValidationWarning { - /// A string contains mixed scripts. Different scripts can contain visually similar characters which may be confused for each other. + /// A string contains a mix of characters for different scripts (e.g., latin + /// and cyrillic alphabets). Different scripts can contain visually similar + /// characters which may be confused for each other. #[diagnostic(transparent)] #[error(transparent)] MixedScriptString(#[from] validation_warnings::MixedScriptString), - /// A string contains BIDI control characters. These can be used to create crafted pieces of code that obfuscate true control flow. + /// A string contains bidirectional text control characters. These can be used to create crafted pieces of code that obfuscate true control flow. #[diagnostic(transparent)] #[error(transparent)] BidiCharsInString(#[from] validation_warnings::BidiCharsInString), - /// An id contains BIDI control characters. These can be used to create crafted pieces of code that obfuscate true control flow. + /// An id contains bidirectional text control characters. These can be used to create crafted pieces of code that obfuscate true control flow. #[diagnostic(transparent)] #[error(transparent)] BidiCharsInIdentifier(#[from] validation_warnings::BidiCharsInIdentifier), - /// An id contains mixed scripts. This can cause characters to be confused for each other. + /// An id contains a mix of characters for different scripts (e.g., latin and + /// cyrillic alphabets). Different scripts can contain visually similar + /// characters which may be confused for each other. #[diagnostic(transparent)] #[error(transparent)] MixedScriptIdentifier(#[from] validation_warnings::MixedScriptIdentifier), - /// An id contains characters that fall outside of the General Security Profile for Identifiers. We recommend adhering to this if possible. See Unicode® Technical Standard #39 for more info. + /// An id contains characters that is not a [graphical ASCII character](https://doc.rust-lang.org/std/primitive.char.html#method.is_ascii_graphic), + /// not the space character (`U+0020`), and falls outside of the General + /// Security Profile for Identifiers. We recommend adhering to this if + /// possible. See [Unicode® Technical Standard #39](https://unicode.org/reports/tr39/#General_Security_Profile) for more information. #[diagnostic(transparent)] #[error(transparent)] ConfusableIdentifier(#[from] validation_warnings::ConfusableIdentifier),