Skip to content

Commit

Permalink
Generate actions HashMap at ValidatorSchema instantiation (#1290)
Browse files Browse the repository at this point in the history
Signed-off-by: Tom Paulus <[email protected]>
  • Loading branch information
tpaulus authored Oct 25, 2024
1 parent 0be2b46 commit 44998fe
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 48 deletions.
25 changes: 7 additions & 18 deletions cedar-policy-validator/src/coreschema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/

use crate::{ValidatorEntityType, ValidatorSchema};
use cedar_policy_core::extensions::{ExtensionFunctionLookupError, Extensions};
use cedar_policy_core::{ast, entities};
use miette::Diagnostic;
use smol_str::SmolStr;
use std::collections::{HashMap, HashSet};
use std::collections::hash_map::Values;
use std::collections::HashSet;
use std::iter::Cloned;
use std::sync::Arc;
use thiserror::Error;

Expand All @@ -28,37 +29,25 @@ use thiserror::Error;
pub struct CoreSchema<'a> {
/// Contains all the information
schema: &'a ValidatorSchema,
/// For easy lookup, this is a map from action name to `Entity` object
/// for each action in the schema. This information is contained in the
/// `ValidatorSchema`, but not efficient to extract -- getting the `Entity`
/// from the `ValidatorSchema` is O(N) as of this writing, but with this
/// cache it's O(1).
actions: HashMap<ast::EntityUID, Arc<ast::Entity>>,
}

impl<'a> CoreSchema<'a> {
/// Create a new `CoreSchema` for the given `ValidatorSchema`
pub fn new(schema: &'a ValidatorSchema) -> Self {
Self {
actions: schema
.action_entities_iter()
.map(|e| (e.uid().clone(), Arc::new(e)))
.collect(),
schema,
}
Self { schema }
}
}

impl<'a> entities::Schema for CoreSchema<'a> {
type EntityTypeDescription = EntityTypeDescription;
type ActionEntityIterator = Vec<Arc<ast::Entity>>;
type ActionEntityIterator = Cloned<Values<'a, ast::EntityUID, Arc<ast::Entity>>>;

fn entity_type(&self, entity_type: &ast::EntityType) -> Option<EntityTypeDescription> {
EntityTypeDescription::new(self.schema, entity_type)
}

fn action(&self, action: &ast::EntityUID) -> Option<Arc<ast::Entity>> {
self.actions.get(action).cloned()
self.schema.actions.get(action).cloned()
}

fn entity_types_with_basename<'b>(
Expand All @@ -79,7 +68,7 @@ impl<'a> entities::Schema for CoreSchema<'a> {
}

fn action_entities(&self) -> Self::ActionEntityIterator {
self.actions.values().map(Arc::clone).collect()
self.schema.actions.values().cloned()
}
}

Expand Down
79 changes: 49 additions & 30 deletions cedar-policy-validator/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,6 @@
//! `member_of` relation from the schema is reversed and the transitive closure is
//! computed to obtain a `descendants` relation.
use std::collections::{hash_map::Entry, BTreeMap, BTreeSet, HashMap, HashSet};
use std::str::FromStr;

use cedar_policy_core::{
ast::{Entity, EntityType, EntityUID, InternalName, Name, UnreservedId},
entities::{err::EntitiesError, Entities, TCComputation},
Expand All @@ -34,6 +31,9 @@ use nonempty::NonEmpty;
use serde::{Deserialize, Serialize};
use serde_with::serde_as;
use smol_str::ToSmolStr;
use std::collections::{hash_map::Entry, BTreeMap, BTreeSet, HashMap, HashSet};
use std::str::FromStr;
use std::sync::Arc;

use crate::{
cedar_schema::SchemaWarning,
Expand Down Expand Up @@ -132,8 +132,8 @@ impl ValidatorSchemaFragment<ConditionalName, ConditionalName> {
))
}

/// Convert this [`ValidatorSchemaFragment<ConditionalName>`] into a
/// [`ValidatorSchemaFragment<Name>`] by fully-qualifying all typenames that
/// Convert this [`ValidatorSchemaFragment<ConditionalName, A>`] into a
/// [`ValidatorSchemaFragment<Name, A>`] by fully-qualifying all typenames that
/// appear anywhere in any definitions.
///
/// `all_defs` needs to contain the full set of all fully-qualified typenames
Expand Down Expand Up @@ -170,6 +170,14 @@ pub struct ValidatorSchema {
/// Map from action id names to the [`ValidatorActionId`] object.
#[serde_as(as = "Vec<(_, _)>")]
action_ids: HashMap<EntityUID, ValidatorActionId>,

/// For easy lookup, this is a map from action name to `Entity` object
/// for each action in the schema. This information is contained in the
/// `ValidatorSchema`, but not efficient to extract -- getting the `Entity`
/// from the `ValidatorSchema` is O(N) as of this writing, but with this
/// cache it's O(1).
#[serde_as(as = "Vec<(_, _)>")]
pub(crate) actions: HashMap<EntityUID, Arc<Entity>>,
}

/// Construct [`ValidatorSchema`] from a string containing a schema formatted
Expand Down Expand Up @@ -288,6 +296,7 @@ impl ValidatorSchema {
Self {
entity_types: HashMap::new(),
action_ids: HashMap::new(),
actions: HashMap::new(),
}
}

Expand Down Expand Up @@ -590,9 +599,14 @@ impl ValidatorSchema {
common_types.into_values(),
)?;

let actions = Self::action_entities_iter(&action_ids)
.map(|e| (e.uid().clone(), Arc::new(e)))
.collect();

Ok(ValidatorSchema {
entity_types,
action_ids,
actions,
})
}

Expand Down Expand Up @@ -813,23 +827,23 @@ impl ValidatorSchema {
/// Invert the action hierarchy to get the ancestor relation expected for
/// the `Entity` datatype instead of descendants as stored by the schema.
pub(crate) fn action_entities_iter(
&self,
action_ids: &HashMap<EntityUID, ValidatorActionId>,
) -> impl Iterator<Item = cedar_policy_core::ast::Entity> + '_ {
// We could store the un-inverted `memberOf` relation for each action,
// but I [john-h-kastner-aws] judge that the current implementation is
// actually less error prone, as it minimizes the threading of data
// structures through some complicated bits of schema construction code,
// and avoids computing the TC twice.
let mut action_ancestors: HashMap<&EntityUID, HashSet<EntityUID>> = HashMap::new();
for (action_euid, action_def) in &self.action_ids {
for (action_euid, action_def) in action_ids {
for descendant in &action_def.descendants {
action_ancestors
.entry(descendant)
.or_default()
.insert(action_euid.clone());
}
}
self.action_ids.iter().map(move |(action_id, action)| {
action_ids.iter().map(move |(action_id, action)| {
Entity::new_with_attr_partial_value_serialized_as_expr(
action_id.clone(),
action.attributes.clone(),
Expand All @@ -842,7 +856,7 @@ impl ValidatorSchema {
pub fn action_entities(&self) -> std::result::Result<Entities, EntitiesError> {
let extensions = Extensions::all_available();
Entities::from_entities(
self.action_entities_iter(),
self.actions.values().map(|entity| entity.as_ref().clone()),
None::<&cedar_policy_core::entities::NoEntitiesSchema>, // we don't want to tell `Entities::from_entities()` to add the schema's action entities, that would infinitely recurse
TCComputation::AssumeAlreadyComputed,
extensions,
Expand Down Expand Up @@ -880,6 +894,28 @@ impl From<&proto::ValidatorSchema> for ValidatorSchema {
// PANIC SAFETY: experimental feature
#[allow(clippy::expect_used)]
fn from(v: &proto::ValidatorSchema) -> Self {
let action_ids = v
.action_ids
.iter()
.map(|kvp| {
let k = ast::EntityUID::from(
kvp.key
.as_ref()
.expect("`as_ref()` for field that should exist"),
);
let v = ValidatorActionId::from(
kvp.value
.as_ref()
.expect("`as_ref()` for field that should exist"),
);
(k, v)
})
.collect();

let actions = Self::action_entities_iter(&action_ids)
.map(|e| (e.uid().clone(), Arc::new(e)))
.collect();

Self {
entity_types: v
.entity_types
Expand All @@ -898,23 +934,8 @@ impl From<&proto::ValidatorSchema> for ValidatorSchema {
(k, v)
})
.collect(),
action_ids: v
.action_ids
.iter()
.map(|kvp| {
let k = ast::EntityUID::from(
kvp.key
.as_ref()
.expect("`as_ref()` for field that should exist"),
);
let v = ValidatorActionId::from(
kvp.value
.as_ref()
.expect("`as_ref()` for field that should exist"),
);
(k, v)
})
.collect(),
action_ids,
actions,
}
}
}
Expand Down Expand Up @@ -2665,8 +2686,7 @@ pub(crate) mod test {
let schema_fragment =
json_schema::Fragment::from_json_value(src).expect("Failed to parse schema");
let schema: ValidatorSchema = schema_fragment.try_into().expect("Schema should construct");
let view_photo = schema
.action_entities_iter()
let view_photo = ValidatorSchema::action_entities_iter(&schema.action_ids)
.find(|e| e.uid() == &r#"ExampleCo::Personnel::Action::"viewPhoto""#.parse().unwrap())
.unwrap();
let ancestors = view_photo.ancestors().collect::<Vec<_>>();
Expand Down Expand Up @@ -2726,8 +2746,7 @@ pub(crate) mod test {
let schema_fragment =
json_schema::Fragment::from_json_value(src).expect("Failed to parse schema");
let schema: ValidatorSchema = schema_fragment.try_into().unwrap();
let view_photo = schema
.action_entities_iter()
let view_photo = ValidatorSchema::action_entities_iter(&schema.action_ids)
.find(|e| e.uid() == &r#"ExampleCo::Personnel::Action::"viewPhoto""#.parse().unwrap())
.unwrap();
let ancestors = view_photo.ancestors().collect::<Vec<_>>();
Expand Down
2 changes: 2 additions & 0 deletions cedar-policy/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Cedar Language Version: TBD
includes a suggestion based on available extension functions (#1280, resolving #332).
- The error associated with parsing a non-existent extension method additionally
includes a suggestion based on available extension methods (#1289, resolving #246).
- Extract action graph inversion from `CoreSchema` to `ValidatorSchema` instantiation
to improve schema validation speeds. (#1290, as part of resolving #1285)

### Fixed

Expand Down

0 comments on commit 44998fe

Please sign in to comment.