cagov · kengodleskidot · Dec 23, 2024 · Dec 19, 2024 · Dec 19, 2024 · Dec 20, 2024
diff --git a/transform/models/intermediate/clearinghouse/_clearinghouse.yml b/transform/models/intermediate/clearinghouse/_clearinghouse.yml
@@ -226,6 +226,15 @@ models:
     tests:
       - five_minute_daily_count:
           group_by_columns: ["detector_id", "sample_date"]
+    columns:
+      - name: speed_weighted
+        description: |
+          If the detector reports a measured speed (miles/hour) in the lane then that value will be used.
+          The reported value is weighted by the number of vehicles in each sample period. If no speed is
+          reported by the device than the speed value calculated from the
+          int_clearinghouse__detector_g_factor_based_speed model will be placed in the corresponding
+          detector and timestamp row. If there is no device or g-factor provided speed the value will remain
+          null and be populated using imputation in downstream models.
   - name: int_clearinghouse__detector_g_factor_based_speed
     description: |
       This model calculates the g-factor based smoothing speed. According to the PeMS documentation, the

diff --git a/...rmediate/clearinghouse/int_clearinghouse__detector_agg_five_minutes_with_missing_rows.sql b/...rmediate/clearinghouse/int_clearinghouse__detector_agg_five_minutes_with_missing_rows.sql
@@ -41,6 +41,15 @@ spine as (
         on to_date(ts.timestamp_column) = dd.sample_date
 ),
 
+-- Add the model where gfactor speed has been calculated
+gfactor_speed as (
+    select
+        detector_id,
+        sample_timestamp,
+        imputed_speed
+    from {{ ref('int_clearinghouse__detector_g_factor_based_speed') }}
+),
+
 /* Join 5-minute aggregated data to the spine to get a table without missing rows */
 base as (
     select
@@ -59,7 +68,7 @@ base as (
         agg.zero_occ_pos_vol_ct,
         agg.high_volume_ct,
         agg.high_occupancy_ct,
-        agg.speed_weighted,
+        coalesce(agg.speed_weighted, gs.imputed_speed) as speed_weighted,
         agg.volume_observed,
         coalesce(agg.state_postmile, dmeta.state_postmile) as state_postmile,
         coalesce(agg.absolute_postmile, dmeta.absolute_postmile) as absolute_postmile,
@@ -88,6 +97,10 @@ base as (
                 to_date(spine.timestamp_column) < dmeta._valid_to
                 or dmeta._valid_to is null
             )
+    left join gfactor_speed as gs
+        on
+            agg.detector_id = gs.detector_id
+            and agg.sample_timestamp = gs.sample_timestamp
 )
 
 select * from base
diff --git a/...rm/models/intermediate/clearinghouse/int_clearinghouse__detector_g_factor_based_speed.sql b/...rm/models/intermediate/clearinghouse/int_clearinghouse__detector_g_factor_based_speed.sql
@@ -20,7 +20,7 @@ detector_agg as (
         occupancy_avg,
         speed_weighted,
         volume_observed
-    from {{ ref('int_clearinghouse__detector_agg_five_minutes_with_missing_rows') }}
+    from {{ ref('int_clearinghouse__detector_agg_five_minutes') }}
     where {{ make_model_incremental('sample_date') }}
 ),
 

diff --git a/transform/models/intermediate/imputation/int_imputation__detector_agg_five_minutes.sql b/transform/models/intermediate/imputation/int_imputation__detector_agg_five_minutes.sql
@@ -88,8 +88,7 @@ unimputed as (
         -- If the detector_id in the join is not null, it means that the detector
         -- is considered to be "good" for a given date.
         (good_detectors.detector_id is not null) as detector_is_good,
-        coalesce(base.speed_weighted, (base.volume_sum * 22) / nullifzero(base.occupancy_avg) * (1 / 5280) * 12)
-            as speed_five_mins
+        base.speed_weighted as speed_five_mins
     from base
     left join good_detectors
         on

diff --git a/...rm/models/intermediate/performance/int_performance__detector_metrics_agg_five_minutes.sql b/...rm/models/intermediate/performance/int_performance__detector_metrics_agg_five_minutes.sql
@@ -23,7 +23,7 @@ five_minute_agg as (
         sample_ct,
         volume_sum,
         occupancy_avg,
-        speed_five_mins as speed_weighted,
+        speed_five_mins,
         station_type,
         absolute_postmile,
         volume_imputation_method,
@@ -35,27 +35,6 @@ five_minute_agg as (
     where {{ make_model_incremental('sample_date') }}
 ),
 
-aggregated_speed as (
-    select
-        *,
-        --A preliminary speed calcuation was developed on 3/22/24
-        --using a vehicle effective length of 22 feet
-        --(16 ft vehicle + 6 ft detector zone) feet and using
-        --a conversion to get miles per hour (5280 ft / mile and 12
-        --5-minute intervals in an hour).
-        --The following code may be used if we want to use speed from raw data
-        --coalesce(speed_raw, ((volume * 22) / nullifzero(occupancy)
-        --* (1 / 5280) * 12))
-        --impute five minutes missing speed
-        coalesce(speed_weighted, (volume_sum * 22) / nullifzero(occupancy_avg) * (1 / 5280) * 12)
-            as speed_five_mins,
-        -- create a boolean function to track wheather speed is imputed or not
-        coalesce(speed_five_mins != speed_weighted or (speed_five_mins is not null and speed_weighted is null), false)
-        -- coalesce(speed_weighted is null, false)
-            as is_speed_calculated
-    from five_minute_agg
-),
-
 vmt_vht_metrics as (
     select
         *,
@@ -67,7 +46,7 @@ vmt_vht_metrics as (
         vmt / nullifzero(vht) as q_value,
         -- travel time
         60 / nullifzero(q_value) as tti
-    from aggregated_speed
+    from five_minute_agg
 ),
 
 delay_metrics as (