-
Notifications
You must be signed in to change notification settings - Fork 1
/
kungfu.sql
153 lines (152 loc) · 4.35 KB
/
kungfu.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
-- assuming the telemetry.csv file in this repo has been imported
-- build a common table expression [cte] that returns it's records,
-- classifies each row's speed based on thresholds [configurable],
-- and determines the last_timestamp for each row.
-- put placeholders in for lat/lng fields that are present
-- in normal telemetry but were skipped for this example
with cte_telemetry as (
select
*,
null as lat,
null as lng,
case
when speed >= 150 then "TOOFAST"
when speed >= 110 then "FAST"
when speed == 0 then "STOPPED"
when speed <= 40 then "SLOW"
else "OK"
end as state,
lag(timestamp) over (
partition by vehicleid
order by
timestamp
) as last_timestamp
from
telemetry
order by
vehicleid,
timestamp
),
-- then, determine each rows previous state provided the previous row was within 3 minutes [configurable]
cte_telemetry_in_context as (
select
*,
if (
(
unix_timestamp(timestamp) - unix_timestamp(last_timestamp)
) < 180,
-- 3 minutes
lag(state) over (
partition by vehicleid
order by
timestamp asc
),
null
) as prev_state
from
cte_telemetry
),
-- then, determine if a state change has occurred
cte_telemetry_state_changed as (
select
VehicleID,
lat,
lng,
Timestamp,
Speed,
state,
prev_state,
if(
(prev_state is null)
or (state != prev_state),
1,
0
) as state_changed
from
cte_telemetry_in_context
),
-- and assign a state change identifier
cte_telemetry_with_state_change_id as (
select
*,
sum(state_changed) over (
partition by vehicleid
order by
timestamp asc
) as state_change_id
from
cte_telemetry_state_changed
),
-- use the vehicle id and state change identifier to work out some metrics and identify the row numbers
-- also create a state_group_id so that this can be reused to visualize all the lat/lngs for a particular vehicle's state at a specific time
cte_telemetry_with_row_num as (
select
vehicleid || '_' || unix_timestamp(timestamp) || '_' || state_change_id as state_group_id,
*,
row_number() over (
partition by vehicleid,
state_change_id
order by
timestamp desc
) as rn,
round(avg(speed) over (
partition by vehicleid,
state_change_id
order by
vehicleid,
timestamp
), 2) as avg_speed_in_state,
max(speed) over (
partition by vehicleid,
state_change_id
order by
vehicleid,
timestamp
) as max_speed_in_state,
min(timestamp) over (
partition by vehicleid,
state_change_id
order by
vehicleid,
timestamp
) as state_timestamp_start
from
cte_telemetry_with_state_change_id
order by
vehicleid,
timestamp
),
-- calculate how many seconds the vehicle was in a state and only focus on the row before a state change
cte_telemetry_state_durations as (
select
state_group_id,
state_timestamp_start as state_start,
timestamp as state_end,
vehicleid as vehicle_id,
state,
avg_speed_in_state,
max_speed_in_state,
unix_timestamp(timestamp) - unix_timestamp(state_timestamp_start) as state_duration_in_seconds,
lat as end_lat,
lng as end_lng
from
cte_telemetry_with_row_num
where
rn = 1
)
-- and finally, use all of that to run normal queries on
-- such as show telemetry durations for particular states, between timespans, by vehicle number, speed, and/or sustained state duration
select
*
from
cte_telemetry_state_durations
where
state in ("FAST", "TOOFAST")
order by state_start;
-- ideas for future possibilities:
-- use the state_group_id to plot on a map
-- identify convergence of vehicles, sequence of reinforcements, time-to-scene, first-on-scene
-- if telemetry was real-time, understand proxmity of deployed resources and posture of reserve resources
-- supplement data with other data sources such as vehicle / crew capability / speciality [e.g. K-9, etc.], or commslogs, or instrument metrics [lights state, sirens state, etc]
-- adjust thresholds to be tuned to the specifics of road the vehicle is on based on lat/lng [e.g. residential vs. school zone vs. rural road, etc]
-- consider traffic density as a function of time of day and impact of vehicle presence on traffic patterns?