This repository has been archived by the owner on Jun 30, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
readShips.R
71 lines (66 loc) · 1.62 KB
/
readShips.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
library(dplyr)
library(geosphere)
library(readr)
readShips <- function(csv, rds) {
# Using file.exists this way might introduce a race condition or access
# problem, however it's fine as long as we run a single app instance.
if (file.exists(rds)) {
message("Reading precomputed ship data")
ships <- readRDS(rds)
} else {
message("Reading raw ship data")
ships <-
.readShipsCsv(csv) %>%
.maxDistancePerShip() %>%
.uniqueNames()
saveRDS(ships, rds)
}
return(ships)
}
.readShipsCsv <- function(csv) {
csv %>%
read_csv(
col_types = cols_only(
"ship_type" = "f",
"SHIPNAME" = "c",
"SHIP_ID" = "c",
"DATETIME" = "T",
"LAT" = "d",
"LON" = "d"
)
) %>%
select(
type = "ship_type",
name = "SHIPNAME",
id = "SHIP_ID",
time = "DATETIME",
lat = "LAT",
lon = "LON"
)
}
.maxDistancePerShip <- function(df) {
df %>%
group_by(type, name, id) %>%
group_modify(~ .maxDistance(.x)) %>%
ungroup()
}
# Append IDs to names where necessary to uniquely identify each ship with
# (type, name) pair.
.uniqueNames <- function(df) {
df %>%
group_by(type, name) %>%
mutate(n = n_distinct(id)) %>%
ungroup() %>%
mutate(name = if_else(n > 1, paste(name, id), name), .keep = "unused")
}
.maxDistance <- function(df) {
df <- arrange(df, time)
df <- bind_cols(
head(df, -1) %>% select(lat1 = lat, lon1 = lon),
tail(df, -1) %>% select(lat2 = lat, lon2 = lon)
)
df %>%
mutate(dist = distGeo(cbind(lon1, lat1), cbind(lon2, lat2))) %>%
slice_max(dist) %>%
slice_tail()
}