diff --git a/recidiviz/aggregated_metrics/aggregated_metrics.py b/recidiviz/aggregated_metrics/aggregated_metrics.py index 9ab92fbc41..61299a37db 100644 --- a/recidiviz/aggregated_metrics/aggregated_metrics.py +++ b/recidiviz/aggregated_metrics/aggregated_metrics.py @@ -89,7 +89,7 @@ def generate_aggregated_metrics_view_builder( metrics_query_str = ",\n ".join([metric.name for metric in included_metrics]) view_description_metrics = [ f"|{metric.display_name} (`{metric.name}`)|{metric.description}|{metric.pretty_name()}|" - f"`{metric.get_metric_conditions_string_no_newline() if isinstance(metric, MetricConditionsMixin) else 'N/A'}`|" + f"`{metric.get_observation_conditions_string_no_newline() if isinstance(metric, MetricConditionsMixin) else 'N/A'}`|" for metric in included_metrics ] view_description_metrics_str = "\n".join(view_description_metrics) diff --git a/recidiviz/aggregated_metrics/models/aggregated_metric.py b/recidiviz/aggregated_metrics/models/aggregated_metric.py index 67b3050a16..7354e96282 100644 --- a/recidiviz/aggregated_metrics/models/aggregated_metric.py +++ b/recidiviz/aggregated_metrics/models/aggregated_metric.py @@ -86,36 +86,38 @@ def unit_of_observation(self) -> MetricUnitOfObservation: def unit_of_observation_type(self) -> MetricUnitOfObservationType: return self.unit_of_observation.type - # TODO(#29291): Simplify this to return just a `str` since there's now only one - # fragment. Consider renaming get_metric_conditions* functions to - # get_observation_conditions*. - def get_metric_conditions(self) -> List[str]: - """Returns a list of conditional query fragments filtering spans or events.""" + def get_observation_conditions_string(self) -> str: + """Returns a query fragment that filters a rows that contain observation data + based on configured observation conditions for this metric. + """ fragment = self.observation_selector.generate_observation_conditions_query_fragment( filter_by_observation_type=True, - # TODO(#29291): Figure out where get_metric_conditions() is being used and - # pass as a parameter through to get_metric_conditions() instead of always - # setting to True here so we can gate the new aggregated metric query code - # properly. + # TODO(#29291): Figure out where get_observation_conditions_string() is + # being used and pass as a parameter through to + # get_observation_conditions_string() instead of always setting to True + # here so we can gate the new aggregated metric query code properly. read_attributes_from_json=True, - # TODO(#29291): Simplify to pass this arg through from the other - # get_metric_conditions* functions. strip_newlines=False, ) - return [f"({fragment})"] - - # TODO(#29291): Write tests for this - def get_metric_conditions_string(self) -> str: - """Returns a query fragment string that joins SQL conditional statements with `AND`.""" - return "\n\t\t\t\tOR\n".join(self.get_metric_conditions()) + return f"({fragment})" - # TODO(#29291): Write tests for this - def get_metric_conditions_string_no_newline(self) -> str: + def get_observation_conditions_string_no_newline(self) -> str: + """Returns a query fragment that filters a rows that contain observation data + based on configured observation conditions for this metric. All newlines are + stripped from the condition string so this can be used in places where we want + more succinct output. """ - Returns a query fragment string that joins SQL conditional statements with `AND` without line breaks - or extra spaces, for more succinct print output. - """ - return re.sub(r" +|\n+", " ", " OR ".join(self.get_metric_conditions())) + fragment = self.observation_selector.generate_observation_conditions_query_fragment( + filter_by_observation_type=True, + # TODO(#29291): Figure out where + # get_observation_conditions_string_no_newline() is being used and pass as + # a parameter through to get_observation_conditions_string_no_newline() + # instead of always setting to True here so we can gate the new aggregated + # metric query code properly. + read_attributes_from_json=True, + strip_newlines=True, + ) + return f"({fragment})" @attr.define(frozen=True, kw_only=True, slots=False) @@ -263,7 +265,7 @@ def generate_aggregation_query_fragment( LEAST({period_end_date_col}, {nonnull_current_date_exclusive_clause(span_end_date_col)}), GREATEST({period_start_date_col}, {span_start_date_col}), DAY) - ) * (IF({self.get_metric_conditions_string()}, 1, 0)) / DATE_DIFF({period_end_date_col}, {period_start_date_col}, DAY) + ) * (IF({self.get_observation_conditions_string()}, 1, 0)) / DATE_DIFF({period_end_date_col}, {period_start_date_col}, DAY) ) AS {self.name} """ @@ -303,7 +305,7 @@ def generate_aggregation_query_fragment( GREATEST({period_start_date_col}, {span_start_date_col}), DAY ) * IF( - {self.get_metric_conditions_string()}, + {self.get_observation_conditions_string()}, CAST(JSON_EXTRACT_SCALAR(span_attributes, "$.{self.span_value_numeric}") AS FLOAT64), 0 ) @@ -313,7 +315,7 @@ def generate_aggregation_query_fragment( LEAST({period_end_date_col}, {nonnull_current_date_exclusive_clause(span_end_date_col)}), GREATEST({period_start_date_col}, {span_start_date_col}), DAY - ) * IF({self.get_metric_conditions_string()}, 1, 0) + ) * IF({self.get_observation_conditions_string()}, 1, 0) ) ) AS {self.name} """ @@ -354,7 +356,7 @@ def generate_aggregation_query_fragment( GREATEST({period_start_date_col}, {span_start_date_col}), DAY ) * IF( - {self.get_metric_conditions_string()}, + {self.get_observation_conditions_string()}, ( # Average of LoS on last day (inclusive) of period/span and LoS on first day of period/span (DATE_DIFF( @@ -373,7 +375,7 @@ def generate_aggregation_query_fragment( LEAST({period_end_date_col}, {nonnull_current_date_exclusive_clause(span_end_date_col)}), GREATEST({period_start_date_col}, {span_start_date_col}), DAY - ) * IF({self.get_metric_conditions_string()}, 1, 0) + ) * IF({self.get_observation_conditions_string()}, 1, 0) ) ) AS {self.name} """ @@ -421,7 +423,7 @@ def generate_aggregation_query_fragment( LEAST({period_end_date_col}, {nonnull_current_date_exclusive_clause(span_end_date_col)}), GREATEST({period_start_date_col}, {span_start_date_col}), DAY) - ) * (IF({self.get_metric_conditions_string()}, 1, 0)) + ) * (IF({self.get_observation_conditions_string()}, 1, 0)) ) AS {self.name} """ @@ -448,7 +450,7 @@ def generate_aggregation_query_fragment( ) -> str: return f""" COUNT(DISTINCT IF( - {self.get_metric_conditions_string()}, + {self.get_observation_conditions_string()}, CONCAT({self.unit_of_observation.get_primary_key_columns_query_string(prefix="ses")}), NULL )) AS {self.name} @@ -473,7 +475,7 @@ def generate_aggregation_query_fragment( ) -> str: return f""" SUM( - IF({self.get_metric_conditions_string()}, DATE_DIFF( + IF({self.get_observation_conditions_string()}, DATE_DIFF( LEAST( DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY), {nonnull_current_date_exclusive_clause(span_end_date_col)} @@ -507,7 +509,7 @@ def generate_aggregation_query_fragment( return f""" MAX( IF( - {self.get_metric_conditions_string()} + {self.get_observation_conditions_string()} AND {span_start_date_col} <= DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY), DATE_DIFF( LEAST( @@ -546,7 +548,7 @@ def generate_aggregation_query_fragment( return f""" AVG( IF( - {self.get_metric_conditions_string()} + {self.get_observation_conditions_string()} AND {assignment_date_col} BETWEEN {span_start_date_col} AND {nonnull_current_date_exclusive_clause(span_end_date_col)}, CAST(JSON_EXTRACT_SCALAR(span_attributes, "$.{self.span_value_numeric}") AS FLOAT64), NULL @@ -608,7 +610,7 @@ def generate_aggregation_query_fragment(self, event_date_col: str) -> str: ) return f""" COUNT(DISTINCT IF( - {self.get_metric_conditions_string()}, + {self.get_observation_conditions_string()}, CONCAT( {self.unit_of_observation.get_primary_key_columns_query_string(prefix="events")}, {event_date_col}{event_segmentation_columns_str} @@ -638,7 +640,7 @@ class EventValueMetric(PeriodEventAggregatedMetric): def generate_aggregation_query_fragment(self, event_date_col: str) -> str: return f""" AVG(IF( - {self.get_metric_conditions_string()}, + {self.get_observation_conditions_string()}, CAST(JSON_EXTRACT_SCALAR(event_attributes, "$.{self.event_value_numeric}") AS FLOAT64), NULL )) AS {self.name} @@ -660,7 +662,7 @@ class EventDistinctUnitCountMetric(PeriodEventAggregatedMetric): def generate_aggregation_query_fragment(self, event_date_col: str) -> str: return f""" COUNT(DISTINCT IF( - {self.get_metric_conditions_string()}, + {self.get_observation_conditions_string()}, CONCAT({self.unit_of_observation.get_primary_key_columns_query_string(prefix="events")}), NULL )) AS {self.name} @@ -687,7 +689,7 @@ def generate_aggregation_query_fragment( MIN(DATE_DIFF( IFNULL( IF( - {self.get_metric_conditions_string()}, + {self.get_observation_conditions_string()}, LEAST({event_date_col}, DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY)), NULL ), DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY)), @@ -715,7 +717,7 @@ def generate_aggregation_query_fragment( return f""" COUNT( DISTINCT IF( - {self.get_metric_conditions_string()} + {self.get_observation_conditions_string()} AND {event_date_col} <= DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY), CONCAT({self.unit_of_observation.get_primary_key_columns_query_string(prefix="events")}, {event_date_col}), NULL @@ -741,7 +743,7 @@ def generate_aggregation_query_fragment( ) -> str: return f""" CAST(LOGICAL_OR( - {self.get_metric_conditions_string()} + {self.get_observation_conditions_string()} AND {event_date_col} <= DATE_ADD({assignment_date_col}, INTERVAL {self.window_length_days} DAY) ) AS INT64) AS {self.name}""" diff --git a/recidiviz/outliers/types.py b/recidiviz/outliers/types.py index bdb5eb1260..3efec5b919 100644 --- a/recidiviz/outliers/types.py +++ b/recidiviz/outliers/types.py @@ -118,7 +118,7 @@ def metric_event_conditions_string(self) -> str: """ The query fragment to use to filter analyst_data.person_events for this metric's events """ - return self.aggregated_metric.get_metric_conditions_string_no_newline() + return self.aggregated_metric.get_observation_conditions_string_no_newline() @attr.s diff --git a/recidiviz/tools/analyst/aggregated_metrics_utils.py b/recidiviz/tools/analyst/aggregated_metrics_utils.py index 43ba820b58..c42d85a0b3 100644 --- a/recidiviz/tools/analyst/aggregated_metrics_utils.py +++ b/recidiviz/tools/analyst/aggregated_metrics_utils.py @@ -420,7 +420,7 @@ def get_person_events( LEFT JOIN `normalized_state.state_person_external_id` pei ON e.person_id = pei.person_id AND e.state_code = pei.state_code - WHERE ({' OR '.join(metric.get_metric_conditions())}) + WHERE ({metric.get_observation_conditions_string()}) {officer_ids_filter} AND e.event_date BETWEEN {min_date_str} AND {max_date_str}