Skip to content

Commit

Permalink
[Aggregated Metrics] Simplify get_metric_conditions() (Recidiviz/reci…
Browse files Browse the repository at this point in the history
…diviz-data#34796)

## Description of the change

Now that there's only one observation selector per metric, we can
simplify the logic in all the `get_metric_conditions*` functions.

I checked and this resulted in no query changes to aggregated_metrics
views on main.

## Type of change

> All pull requests must have at least one of the following labels
applied (otherwise the PR will fail):

| Label | Description |
|-----------------------------
|-----------------------------------------------------------------------------------------------------------
|
| Type: Bug | non-breaking change that fixes an issue |
| Type: Feature | non-breaking change that adds functionality |
| Type: Breaking Change | fix or feature that would cause existing
functionality to not work as expected |
| Type: Non-breaking refactor | change addresses some tech debt item or
prepares for a later change, but does not change functionality |
| Type: Configuration Change | adjusts configuration to achieve some end
related to functionality, development, performance, or security |
| Type: Dependency Upgrade | upgrades a project dependency - these
changes are not included in release notes |

## Related issues

Part of Recidiviz/recidiviz-data#29291

## Checklists

### Development

**This box MUST be checked by the submitter prior to merging**:
- [x] **Double- and triple-checked that there is no Personally
Identifiable Information (PII) being mistakenly added in this pull
request**

These boxes should be checked by the submitter prior to merging:
- [ ] Tests have been written to cover the code changed/added as part of
this pull request

### Code review

These boxes should be checked by reviewers prior to merging:

- [x] This pull request has a descriptive title and information useful
to a reviewer
- [x] Potential security implications or infrastructural changes have
been considered, if relevant

GitOrigin-RevId: 87471cf8ed26e467ccec1da77ff557238653709a
  • Loading branch information
ageiduschek authored and Helper Bot committed Nov 26, 2024
1 parent fc74213 commit 43815fa
Show file tree
Hide file tree
Showing 4 changed files with 44 additions and 42 deletions.
2 changes: 1 addition & 1 deletion recidiviz/aggregated_metrics/aggregated_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def generate_aggregated_metrics_view_builder(
metrics_query_str = ",\n ".join([metric.name for metric in included_metrics])
view_description_metrics = [
f"|{metric.display_name} (`{metric.name}`)|{metric.description}|{metric.pretty_name()}|"
f"`{metric.get_metric_conditions_string_no_newline() if isinstance(metric, MetricConditionsMixin) else 'N/A'}`|"
f"`{metric.get_observation_conditions_string_no_newline() if isinstance(metric, MetricConditionsMixin) else 'N/A'}`|"
for metric in included_metrics
]
view_description_metrics_str = "\n".join(view_description_metrics)
Expand Down
80 changes: 41 additions & 39 deletions recidiviz/aggregated_metrics/models/aggregated_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,36 +86,38 @@ def unit_of_observation(self) -> MetricUnitOfObservation:
def unit_of_observation_type(self) -> MetricUnitOfObservationType:
return self.unit_of_observation.type

# TODO(#29291): Simplify this to return just a `str` since there's now only one
# fragment. Consider renaming get_metric_conditions* functions to
# get_observation_conditions*.
def get_metric_conditions(self) -> List[str]:
"""Returns a list of conditional query fragments filtering spans or events."""
def get_observation_conditions_string(self) -> str:
"""Returns a query fragment that filters a rows that contain observation data
based on configured observation conditions for this metric.
"""
fragment = self.observation_selector.generate_observation_conditions_query_fragment(
filter_by_observation_type=True,
# TODO(#29291): Figure out where get_metric_conditions() is being used and
# pass as a parameter through to get_metric_conditions() instead of always
# setting to True here so we can gate the new aggregated metric query code
# properly.
# TODO(#29291): Figure out where get_observation_conditions_string() is
# being used and pass as a parameter through to
# get_observation_conditions_string() instead of always setting to True
# here so we can gate the new aggregated metric query code properly.
read_attributes_from_json=True,
# TODO(#29291): Simplify to pass this arg through from the other
# get_metric_conditions* functions.
strip_newlines=False,
)
return [f"({fragment})"]

# TODO(#29291): Write tests for this
def get_metric_conditions_string(self) -> str:
"""Returns a query fragment string that joins SQL conditional statements with `AND`."""
return "\n\t\t\t\tOR\n".join(self.get_metric_conditions())
return f"({fragment})"

# TODO(#29291): Write tests for this
def get_metric_conditions_string_no_newline(self) -> str:
def get_observation_conditions_string_no_newline(self) -> str:
"""Returns a query fragment that filters a rows that contain observation data
based on configured observation conditions for this metric. All newlines are
stripped from the condition string so this can be used in places where we want
more succinct output.
"""
Returns a query fragment string that joins SQL conditional statements with `AND` without line breaks
or extra spaces, for more succinct print output.
"""
return re.sub(r" +|\n+", " ", " OR ".join(self.get_metric_conditions()))
fragment = self.observation_selector.generate_observation_conditions_query_fragment(
filter_by_observation_type=True,
# TODO(#29291): Figure out where
# get_observation_conditions_string_no_newline() is being used and pass as
# a parameter through to get_observation_conditions_string_no_newline()
# instead of always setting to True here so we can gate the new aggregated
# metric query code properly.
read_attributes_from_json=True,
strip_newlines=True,
)
return f"({fragment})"


@attr.define(frozen=True, kw_only=True, slots=False)
Expand Down Expand Up @@ -263,7 +265,7 @@ def generate_aggregation_query_fragment(
LEAST({period_end_date_col}, {nonnull_current_date_exclusive_clause(span_end_date_col)}),
GREATEST({period_start_date_col}, {span_start_date_col}),
DAY)
) * (IF({self.get_metric_conditions_string()}, 1, 0)) / DATE_DIFF({period_end_date_col}, {period_start_date_col}, DAY)
) * (IF({self.get_observation_conditions_string()}, 1, 0)) / DATE_DIFF({period_end_date_col}, {period_start_date_col}, DAY)
) AS {self.name}
"""

Expand Down Expand Up @@ -303,7 +305,7 @@ def generate_aggregation_query_fragment(
GREATEST({period_start_date_col}, {span_start_date_col}),
DAY
) * IF(
{self.get_metric_conditions_string()},
{self.get_observation_conditions_string()},
CAST(JSON_EXTRACT_SCALAR(span_attributes, "$.{self.span_value_numeric}") AS FLOAT64),
0
)
Expand All @@ -313,7 +315,7 @@ def generate_aggregation_query_fragment(
LEAST({period_end_date_col}, {nonnull_current_date_exclusive_clause(span_end_date_col)}),
GREATEST({period_start_date_col}, {span_start_date_col}),
DAY
) * IF({self.get_metric_conditions_string()}, 1, 0)
) * IF({self.get_observation_conditions_string()}, 1, 0)
)
) AS {self.name}
"""
Expand Down Expand Up @@ -354,7 +356,7 @@ def generate_aggregation_query_fragment(
GREATEST({period_start_date_col}, {span_start_date_col}),
DAY
) * IF(
{self.get_metric_conditions_string()},
{self.get_observation_conditions_string()},
(
# Average of LoS on last day (inclusive) of period/span and LoS on first day of period/span
(DATE_DIFF(
Expand All @@ -373,7 +375,7 @@ def generate_aggregation_query_fragment(
LEAST({period_end_date_col}, {nonnull_current_date_exclusive_clause(span_end_date_col)}),
GREATEST({period_start_date_col}, {span_start_date_col}),
DAY
) * IF({self.get_metric_conditions_string()}, 1, 0)
) * IF({self.get_observation_conditions_string()}, 1, 0)
)
) AS {self.name}
"""
Expand Down Expand Up @@ -421,7 +423,7 @@ def generate_aggregation_query_fragment(
LEAST({period_end_date_col}, {nonnull_current_date_exclusive_clause(span_end_date_col)}),
GREATEST({period_start_date_col}, {span_start_date_col}),
DAY)
) * (IF({self.get_metric_conditions_string()}, 1, 0))
) * (IF({self.get_observation_conditions_string()}, 1, 0))
) AS {self.name}
"""

Expand All @@ -448,7 +450,7 @@ def generate_aggregation_query_fragment(
) -> str:
return f"""
COUNT(DISTINCT IF(
{self.get_metric_conditions_string()},
{self.get_observation_conditions_string()},
CONCAT({self.unit_of_observation.get_primary_key_columns_query_string(prefix="ses")}),
NULL
)) AS {self.name}
Expand All @@ -473,7 +475,7 @@ def generate_aggregation_query_fragment(
) -> str:
return f"""
SUM(
IF({self.get_metric_conditions_string()}, DATE_DIFF(
IF({self.get_observation_conditions_string()}, DATE_DIFF(
LEAST(
DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY),
{nonnull_current_date_exclusive_clause(span_end_date_col)}
Expand Down Expand Up @@ -507,7 +509,7 @@ def generate_aggregation_query_fragment(
return f"""
MAX(
IF(
{self.get_metric_conditions_string()}
{self.get_observation_conditions_string()}
AND {span_start_date_col} <= DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY),
DATE_DIFF(
LEAST(
Expand Down Expand Up @@ -546,7 +548,7 @@ def generate_aggregation_query_fragment(
return f"""
AVG(
IF(
{self.get_metric_conditions_string()}
{self.get_observation_conditions_string()}
AND {assignment_date_col} BETWEEN {span_start_date_col} AND {nonnull_current_date_exclusive_clause(span_end_date_col)},
CAST(JSON_EXTRACT_SCALAR(span_attributes, "$.{self.span_value_numeric}") AS FLOAT64),
NULL
Expand Down Expand Up @@ -608,7 +610,7 @@ def generate_aggregation_query_fragment(self, event_date_col: str) -> str:
)
return f"""
COUNT(DISTINCT IF(
{self.get_metric_conditions_string()},
{self.get_observation_conditions_string()},
CONCAT(
{self.unit_of_observation.get_primary_key_columns_query_string(prefix="events")},
{event_date_col}{event_segmentation_columns_str}
Expand Down Expand Up @@ -638,7 +640,7 @@ class EventValueMetric(PeriodEventAggregatedMetric):
def generate_aggregation_query_fragment(self, event_date_col: str) -> str:
return f"""
AVG(IF(
{self.get_metric_conditions_string()},
{self.get_observation_conditions_string()},
CAST(JSON_EXTRACT_SCALAR(event_attributes, "$.{self.event_value_numeric}") AS FLOAT64),
NULL
)) AS {self.name}
Expand All @@ -660,7 +662,7 @@ class EventDistinctUnitCountMetric(PeriodEventAggregatedMetric):
def generate_aggregation_query_fragment(self, event_date_col: str) -> str:
return f"""
COUNT(DISTINCT IF(
{self.get_metric_conditions_string()},
{self.get_observation_conditions_string()},
CONCAT({self.unit_of_observation.get_primary_key_columns_query_string(prefix="events")}),
NULL
)) AS {self.name}
Expand All @@ -687,7 +689,7 @@ def generate_aggregation_query_fragment(
MIN(DATE_DIFF(
IFNULL(
IF(
{self.get_metric_conditions_string()},
{self.get_observation_conditions_string()},
LEAST({event_date_col}, DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY)),
NULL
), DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY)),
Expand Down Expand Up @@ -715,7 +717,7 @@ def generate_aggregation_query_fragment(
return f"""
COUNT(
DISTINCT IF(
{self.get_metric_conditions_string()}
{self.get_observation_conditions_string()}
AND {event_date_col} <= DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY),
CONCAT({self.unit_of_observation.get_primary_key_columns_query_string(prefix="events")}, {event_date_col}),
NULL
Expand All @@ -741,7 +743,7 @@ def generate_aggregation_query_fragment(
) -> str:
return f"""
CAST(LOGICAL_OR(
{self.get_metric_conditions_string()}
{self.get_observation_conditions_string()}
AND {event_date_col} <= DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY)
) AS INT64) AS {self.name}"""

Expand Down
2 changes: 1 addition & 1 deletion recidiviz/outliers/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def metric_event_conditions_string(self) -> str:
"""
The query fragment to use to filter analyst_data.person_events for this metric's events
"""
return self.aggregated_metric.get_metric_conditions_string_no_newline()
return self.aggregated_metric.get_observation_conditions_string_no_newline()


@attr.s
Expand Down
2 changes: 1 addition & 1 deletion recidiviz/tools/analyst/aggregated_metrics_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,7 +420,7 @@ def get_person_events(
LEFT JOIN `normalized_state.state_person_external_id` pei
ON e.person_id = pei.person_id
AND e.state_code = pei.state_code
WHERE ({' OR '.join(metric.get_metric_conditions())})
WHERE ({metric.get_observation_conditions_string()})
{officer_ids_filter}
AND e.event_date BETWEEN {min_date_str} AND {max_date_str}
Expand Down

0 comments on commit 43815fa

Please sign in to comment.