diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ceb34c6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +workshop.sql +*.pdf +*.epub +*.html \ No newline at end of file diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index bd83f85..dbc56f9 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -1,61 +1,48 @@ # MobilityDB Workshop Contribution Guide -MobilityDB uses DocBook v4.5 to generate workshop guides. The instructions below provide a step-by-step guide to convert from a Notion markdown export to the XML format required to successfully compile using `dblatex`. The process uses a tool called `pandoc` to convert into DocBook v5 and then some manual editing to work with v4.5 of DocBook. +MobilityDB uses DocBook v4.5 to generate workshop guides. The instructions below provide a step-by-step guide to contribute to the workshop guide. ## Conventions - Images are in `.png` format and use pascal case (e.g. `VeryImportantImage.png`) for filename. -- XML files use lowercase underscores (e.g. `my_filename.xml`)and acronyms remain uppercase (e.g. `my_AWS_example.xml`) +- XML files use lowercase underscores (e.g. `my_filename.xml`) and acronyms remain uppercase (e.g. `my_AWS_example.xml`) -## Start +## Contribution Workflow Steps are as follows: -- Export from notion to Markdown -- Change folder to "images" -- Rename file to AIS_Dashboard - -IN MARKDOWN -Find and replace image folder `(Dashboard%20and%20Visualization%20of%20Ship%20Trajectories%20(%20845afa91ff30470181ea3a0b5ddf08b5/` with `(images/` -Find and replace `%20` with `" "`. Refactor all image file names (use caption as image name, refactor name + pascal case) -Remove initial heading -Remove all caption duplicate text - -### Convert Markdown to DocBook v5 - -From terminal run the following commands. Command arguments: -- `FlightDataDashboard.md` -> The name of your notion markdown file. -- `FlightDataDashboard.xml` -> Name of the output file. -```bash -cd -pandoc FlightDataDashboard.md -f markdown -t docbook5 -s -o FlightDataDashboard.xml +- Usually, the latest version of the workshop guide is in the `develop` branch. Refer always to this branch. +- Fork the repository and clone it locally. +- Create a new branch from `develop` and name it `feature/`. For example, 'feature/correcting_typos'. +- Make your changes and commit them to your branch. +- Push your branch to your forked repository. +- Create a pull request from your branch to the `develop` branch of the main repository. Please, make sure to detail your changes in the pull request description. A nice way to do this is to use the following template: + + ```markdown + Changes: + - Fix typos + - Add new section + - etc. + ``` + +## Changing the Workshop Guide + +If you want to make changes to the guide, you will need to edit the xml file corresponding to the chapter you want to change. For example, if you want to propose a change to the `AIS_Dashboard` chapter, you will need to edit the `doc/AIS_Dashboard.xml` file. + +## Adding a New Chapter + +If you want to add a new chapter to the guide, you will need to create a new xml file in the `doc` folder. For example, if you want to add a new chapter called `My_New_Chapter`, you will need to create a new file called `doc/My_New_Chapter.xml`. + +If you need to insert any images in the new chapter, you will need to add them to the `doc/images` folder. For example, if you want to add an image called `My_New_Image.png`, you will need to add it to the `doc/images` folder. + +You will also need to add the following line to the `mobilitydb-workshop.xml` file: + +```xml + ``` -## In Generated XML - -- Delete header information `` -- Insert `` at beginning -- Insert `Dashboard and Visualization of Ship Trajectories (AIS)` -- Remove `` at end -- Insert `` at end -- Find/replace `` with `` -- Find/replace `` - - `
` becomes `
` -- Replace `<` with `<` -- Replace `>` with `>` - -COPY AIS_Dashboard.xml into parent doc folder -COPY all images from the notion export folder into the doc/images folder - -IN mobilitydb-workshop.xml -ADD at header -ADD &FlightDataDashboard; at end - -# In Terminal -```bash -cd .. -dblatex -s texstyle.sty -T native -t pdf -o mobilitydb-workshop.pdf mobilitydb-workshop.xml +Finally, you will need to add the following line to the `mobilitydb-workshop.xml` file, in the appropriate place: + +```xml +&My_New_Chapter; ``` -TODO: change image filename \ No newline at end of file +This will include the new chapter in the guide. \ No newline at end of file diff --git a/doc/AIS.xml b/doc/AIS.xml index c95544e..fc9557a 100644 --- a/doc/AIS.xml +++ b/doc/AIS.xml @@ -36,7 +36,7 @@
Data - The Danish Maritime Authority publishes about 3 TB of AIS routes in CSV format here. The columns in the CSV are listed in . This module uses the data of one day April 1st 2018. The CSV file size is 1.9 GB, and it contains about 10 M rows. + The Danish Maritime Authority publishes about 3 TB of AIS routes in CSV format here. The columns in the CSV are listed in . This module uses the data of one day June 1st 2023. The CSV file size is 582 MB, and it contains more than 11 M rows. AIS columns @@ -81,7 +81,7 @@ The tools used in this module are as follows: - MobilityDB, on top of PostgreSQL and PostGIS. Here I use the MobilityDB docker image. + MobilityDB, on top of PostgreSQL and PostGIS. Although you can use a docker image, we recommend to install MobilityDB on your system, either from binaries or from sources. QGIS @@ -144,11 +144,18 @@ COPY AISInput(T, TypeOfMobile, MMSI, Latitude, Longitude, NavigationalStatus, ROT, SOG, COG, Heading, IMO, CallSign, Name, ShipType, CargoType, Width, Length, TypeOfPositionFixingDevice, Draught, Destination, ETA, DataSourceType, SizeA, SizeB, SizeC, SizeD) -FROM '/home/mobilitydb/DanishAIS/aisdk_20180401.csv' DELIMITER ',' CSV HEADER; +FROM '/home/mobilitydb/DanishAIS/aisdk-2023-06-01.csv' DELIMITER ',' CSV HEADER; - This import took about 3 minutes on my machine, which is an average laptop. The CSV file has 10,619,212 rows, all of which were correctly imported. For bigger datasets, one could alternative could use the program pgloader. + It is possible that the above command fails with a permission error. The reason for this is that COPY is a server capability, while the CSV file is on the client side. To overcome this issue, one can use the \copy command of psql as follows: + +psql -d DanishAIS -c "\copy AISInput(T, TypeOfMobile, MMSI, Latitude, Longitude, NavigationalStatus, ROT, SOG, COG, Heading, IMO, CallSign, Name, ShipType, CargoType, Width, Length, TypeOfPositionFixingDevice, Draught, Destination, ETA, DataSourceType, SizeA, SizeB, SizeC, SizeD) FROM '/home/mobilitydb/DanishAIS/aisdk-2023-06-01.csv' DELIMITER ',' CSV HEADER;" + + In addition, if you downloaded the CSV file from this repo's data , then you will need to add the column 'geom' to the command. + + + This import took about 1 minute and 30 seconds on my machine, which is a development laptop. The CSV file has 11,809,593 rows, all of which were correctly imported. For bigger datasets, one could alternative could use the program pgloader. We clean up some of the fields in the table and create spatial points with the following command. @@ -163,7 +170,7 @@ UPDATE AISInput SET - This took about 5 minutes on my machine. Let's visualize the spatial points on QGIS. + The above query took around 1.5 min on my desktop. Let's visualize the spatial points on QGIS.
Visualizing the input points @@ -187,9 +194,9 @@ CREATE TABLE AISInputFiltered AS SELECT DISTINCT ON(MMSI, T) * FROM AISInput WHERE Longitude BETWEEN -16.1 AND 32.88 AND Latitude BETWEEN 40.18 AND 84.17; --- Query returned successfully: 10357703 rows affected, 01:14 minutes execution time. +-- Query returned successfully: 11545496 rows affected, 00:45 minutes execution time. SELECT COUNT(*) FROM AISInputFiltered; ---10357703 +--11545496 @@ -201,26 +208,29 @@ SELECT COUNT(*) FROM AISInputFiltered; -CREATE TABLE Ships(MMSI, Trip, SOG, COG) AS -SELECT MMSI, - tgeompoint_contseq(array_agg(tgeompoint(ST_Transform(Geom, 25832), T) ORDER BY T)), - tfloat_contseq(array_agg(tfloat(SOG, T) ORDER BY T) FILTER (WHERE SOG IS NOT NULL)), - tfloat_contseq(array_agg(tfloat(COG, T) ORDER BY T) FILTER (WHERE COG IS NOT NULL)) -FROM AISInputFiltered +CREATE TABLE Ships(MMSI, Trip, SOG, COG) AS +SELECT MMSI, + tgeompoint_seq(array_agg(tgeompoint_inst(ST_Transform(Geom, 25832), T) ORDER BY T)), + tfloat_seq(array_agg(tfloat_inst(SOG, T) ORDER BY T) FILTER (WHERE SOG IS NOT NULL)), + tfloat_seq(array_agg(tfloat_inst(COG, T) ORDER BY T) FILTER (WHERE COG IS NOT NULL)) +FROM AISInputFiltered GROUP BY MMSI; --- Query returned successfully: 2995 rows affected, 01:16 minutes execution time. +-- Query returned successfully: 6264 rows affected, 00:52 minutes execution time. - This query constructs, per ship, its spatiotemporal trajectory Trip, and two temporal attributes SOG and COG. Trip is a temporal geometry point, and both SOG and COG are temporal floats. MobilityDB builds on the coordinate transformation feature of PostGIS. Here the SRID 25832 (European Terrestrial Reference System 1989) is used, because it is the one advised by Danish Maritime Authority in the download page of this dataset. Now, let's visualize the constructed trajectories in QGIS. + This query constructs, per ship, its spatiotemporal trajectory Trip, and two temporal attributes SOG and COG. Trip is a temporal geometry point, and both SOG and COG are temporal floats. MobilityDB builds on the coordinate transformation feature of PostGIS. Here the SRID 25832 (European Terrestrial Reference System 1989) is used, because it is the one advised by Danish Maritime Authority in the download page of this dataset. shows the constructed trajectories in QGIS. ALTER TABLE Ships ADD COLUMN Traj geometry; UPDATE Ships SET Traj = trajectory(Trip); --- Query returned successfully: 2995 rows affected, 3.8 secs execution time. +-- Query returned successfully: 6264 rows affected, 3.8 secs execution time. + + shows the constructed trajectories in QGIS. Notice that there are still significant errors in the data, in particular the vertical lines. These errors need to be corrected so that the analytical queries in the following sections return more accurate results. We do not cope with this issue here, since the topic of trajectory cleaning is beyond the scope of this introductory workshop. +
Visualizing the ship trajectories @@ -238,7 +248,7 @@ UPDATE Ships SET Traj = trajectory(Trip); SELECT SUM(length(Trip)) FROM Ships; --- 500433519.121321 +-- 807319558.5805709 @@ -247,48 +257,45 @@ SELECT SUM(length(Trip)) FROM Ships; WITH buckets (bucketNo, RangeKM) AS ( - SELECT 1, floatrange '[0, 0]' UNION - SELECT 2, floatrange '(0, 50)' UNION - SELECT 3, floatrange '[50, 100)' UNION - SELECT 4, floatrange '[100, 200)' UNION - SELECT 5, floatrange '[200, 500)' UNION - SELECT 6, floatrange '[500, 1500)' UNION - SELECT 7, floatrange '[1500, 10000)' ), + SELECT 1, floatspan '[0, 0]' UNION + SELECT 2, floatspan '(0, 50)' UNION + SELECT 3, floatspan '[50, 100)' UNION + SELECT 4, floatspan '[100, 200)' UNION + SELECT 5, floatspan '[200, 500)' UNION + SELECT 6, floatspan '[500, 1500)' UNION + SELECT 7, floatspan '[1500, 10000)' ), histogram AS ( SELECT bucketNo, RangeKM, count(MMSI) as freq - FROM buckets left outer join Ships on (length(Trip)/1000) <@ RangeKM + FROM buckets left outer join Ships on (length(Trip)/1000) <@ RangeKM GROUP BY bucketNo, RangeKM ORDER BY bucketNo, RangeKM ) SELECT bucketNo, RangeKM, freq, - repeat('▪', ( freq::float / max(freq) OVER () * 30 )::int ) AS bar + repeat('▪', ( freq::float / max(freq) OVER () * 30 )::int ) AS bar FROM histogram; --- Total query runtime: 5.6 secs +-- Total query runtime: 2.8 secs -bucketNo, bucketRange, freq bar -1; "[0,0]"; 303; ▪▪▪▪▪ -2; "(0,50)"; 1693; ▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ -3; "[50,100)"; 267; ▪▪▪▪▪ -4; "[100,200)"; 276; ▪▪▪▪▪ -5; "[200,500)"; 361; ▪▪▪▪▪▪ -6; "[500,1500)"; 86; ▪▪ -7; "[1500,10000)"; 6; + bucketno | rangekm | freq | bar +----------+---------------+------+-------------------------------- + 1 | [0, 0] | 796 | ▪▪▪▪▪▪ + 2 | (0, 50) | 3752 | ▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪▪ + 3 | [50, 100) | 571 | ▪▪▪▪▪ + 4 | [100, 200) | 452 | ▪▪▪▪ + 5 | [200, 500) | 569 | ▪▪▪▪▪ + 6 | [500, 1500) | 100 | ▪ + 7 | [1500, 10000) | 22 | +(7 rows) - Surprisingly there are trips with zero length. These are clearly noise that can be deleted. Also there are very many short trips, that are less than 50 km long. On the other hand, there are few long trips that are more than 1,500 km long. Let's visualize these last two cases in . They look like noise. Normally one should validate more, but to simplify this module, we consider them as noise, and delete them. + Surprisingly there are trips with zero length. These are clearly noise that can be deleted. Also there are very many short trips, that are less than 50 km long. On the other hand, there are few long trips that are more than 1,500 km long. They look like noise. Normally one should validate more, but to simplify this module, we consider them as noise, and delete them. DELETE FROM Ships WHERE length(Trip) = 0 OR length(Trip) >= 1500000; --- Query returned successfully in 7 secs 304 msec. +-- DELETE 820 Now the Ships table looks like . -
Visualizing trips with abnormal lengths - - - -
Ship trajectories after filtering @@ -298,39 +305,22 @@ WHERE length(Trip) = 0 OR length(Trip) >= 1500000; Let's have a look at the speed of the ships. There are two speed values in the data; the speed calculated from the spatiotemporal trajectory speed(Trip), and the SOG attribute. Optimally, the two will be the same. A small variance would still be OK, because of sensor errors. Note that both are temporal floats. In the next query, we compare the averages of the two speed values for every ship: SELECT ABS(twavg(SOG) * 1.852 - twavg(speed(Trip))* 3.6 ) SpeedDifference -FROM Ships +FROM Ships WHERE SOG IS NOT NULL AND + ABS(twavg(SOG) * 1.852 - twavg(speed(Trip))* 3.6 ) > 10.0 ORDER BY SpeedDifference DESC; --- Total query runtime: 8.2 secs --- 990 rows retrieved. -SpeedDifference -NULL -NULL -NULL -NULL -NULL -107.861100067879 -57.1590253627668 -42.4207839833568 -39.5819188407125 -33.6182789410313 -30.9078594633161 -26.514042447366 -22.1312646226031 -20.5389022294181 -19.8500569368283 -19.4134688682774 -18.180139457754 -17.4859077178001 -17.3155991287105 -17.1739822139821 -12.9571603234404 -12.6195380496344 -12.2714437568609 -10.9619033557275 -10.4164745930929 -10.3306155308426 -9.46457823214455 + speeddifference +------------------- + 241.42049907716665 + 134.61257387558112 + 112.36643046964278 + 110.10490793777619 + 81.66118732332465 + 81.5725669336415 + 69.85832834619002 + 57.232404771295045 + 52.943341619001586 + 52.921746684116904 ... @@ -338,7 +328,11 @@ NULL The twavg computes a time-weighted average of a temporal float. It basically computes the area under the curve, then divides it by the time duration of the temporal float. By doing so, the speed values that remain for longer durations affect the average more than those that remain for shorter durations. Note that SOG is in knot, and Speed(Trip) is in m/s. The query converts both to km/h. - The query shows that 26 out of the 990 ship trajectories in the table have a difference of more than 10 km/h or NULL. These trajectories are shown in . Again they look like noise, so we remove them. + The query shows the first 10 ship trajectories of the 82 in the table that have a difference of more than 10 km/h. These trajectories are shown in . Again they look like noise, so we remove them with the following query + +DELETE FROM Ships +WHERE ABS(twavg(SOG) * 1.852 - twavg(speed(Trip))* 3.6 ) > 10; +
Ship trajectories with big difference between <varname>speed(Trip)</varname> and <varname>SOG</varname> @@ -348,35 +342,28 @@ NULL Now we do a similar comparison between the calculated azimuth from the spatiotemporal trajectory, and the attribute COG: -SELECT ABS(twavg(COG) - twavg(azimuth(Trip)) * 180.0/pi() ) AzimuthDifference -FROM Ships +SELECT ABS(twavg(COG) - twavg(azimuth(Trip)) * 180.0/pi()) AzimuthDifference +FROM Ships +WHERE ABS(twavg(COG) - twavg(azimuth(Trip)) * 180.0/pi()) > 45.0 ORDER BY AzimuthDifference DESC; --- Total query runtime: 4.0 secs --- 964 rows retrieved. -264.838740787458 -220.958372832234 -180.867071483688 -178.774337481463 -154.239639388087 -139.633953692907 -137.347542674865 -128.239459879571 -121.107566199195 -119.843262642657 -116.685117326047 -116.010477588934 -109.830338231363 -106.94301191915 -106.890186229337 -106.55297972109 -103.20192549283 -102.585009756697 + azimuthdifference +-------------------- + 355.4200584570843 + 348.213417943632 + 333.7458943572906 + 321.5644829906112 + 309.6935360677792 + 308.4444213637132 + 295.5019204058323 + 294.7215887580075 + 267.8656764337898 + 267.09343294055583 ... - Here we see that the COG is not as accurate as the SOG attribute. More than 100 trajectories have an azimuth difference bigger than 45 degrees. visualizes them. Some of them look like noise, but some look fine. For simplicity, we keep them all. + Here we see that the COG is not as accurate as was the case for the SOG attribute. More than 1600 trajectories have an azimuth difference bigger than 45 degrees. visualizes them. Some of them look like noise, but some look fine. For simplicity, we keep them all.
Ship trajectories with big difference between <varname>azimuth(Trip)</varname> and <varname>COG</varname> @@ -388,7 +375,7 @@ ORDER BY AzimuthDifference DESC;
Analyzing the Trajectories - Now we dive into MobilityDB and explore more of its functions. In , we notice trajectories that keep going between Rødby and Puttgarden. Most probably, these are the ferries between the two ports. The task is simply to spot which Ships do so, and to count how many one way trips they did in this day. This is expressed in the following query: + Now we dive into MobilityDB and explore more of its functions. In , we notice trajectories that keep going between Rødby and Puttgarden. Most probably, these are the ferries between the two ports. The task is simply to spot which ships do so, and to count how many one way trips they did in this day. This is expressed in the following query: @@ -396,11 +383,10 @@ CREATE INDEX Ships_Trip_Idx ON Ships USING GiST(Trip); WITH Ports(Rodby, Puttgarden) AS ( SELECT ST_MakeEnvelope(651135, 6058230, 651422, 6058548, 25832), - ST_MakeEnvelope(644339, 6042108, 644896, 6042487, 25832) -) + ST_MakeEnvelope(644339, 6042108, 644896, 6042487, 25832) ) SELECT S.*, Rodby, Puttgarden FROM Ports P, Ships S -WHERE intersects(S.Trip, P.Rodby) AND intersects(S.Trip, P.Puttgarden) +WHERE eintersects(S.Trip, P.Rodby) AND eintersects(S.Trip, P.Puttgarden); -- Total query runtime: 462 msec -- 4 rows retrieved. @@ -416,23 +402,23 @@ WHERE intersects(S.Trip, P.Rodby) AND intersects(S.Trip, P.Puttgarden)
- This query creates two envelope geometries that represent the locations of the two ports, then intersects them with the spatiotemporal trajectories of the ships. The intersects function checks whether a temporal point has ever intersects a geometry. To speed up the query, a spatiotemporal GiST index is first built on the Trip attribute. The query identified four Ships that commuted between the two ports, . To count how many one way trips each of them did, we extend the previous query as follows: + This query creates two envelope geometries that represent the locations of the two ports, then intersects them with the spatiotemporal trajectories of the ships. The eintersects function checks whether a temporal point ever intersects a geometry. To speed up the query, a spatiotemporal GiST index is first built on the Trip attribute. The query identified four Ships that commuted between the two ports, . To count how many one way trips each of them did, we extend the previous query as follows: WITH Ports(Rodby, Puttgarden) AS ( SELECT ST_MakeEnvelope(651135, 6058230, 651422, 6058548, 25832), - ST_MakeEnvelope(644339, 6042108, 644896, 6042487, 25832) -) + ST_MakeEnvelope(644339, 6042108, 644896, 6042487, 25832) ) SELECT MMSI, (numSequences(atGeometry(S.Trip, P.Rodby)) + numSequences(atGeometry(S.Trip, P.Puttgarden)))/2.0 AS NumTrips FROM Ports P, Ships S -WHERE intersects(S.Trip, P.Rodby) AND intersects(S.Trip, P.Puttgarden) --- Total query runtime: 1.1 secs +WHERE eintersects(S.Trip, P.Rodby) AND eintersects(S.Trip, P.Puttgarden); -MMSI NumTrips -219000429; 24.0 -211188000; 24.0 -211190000; 25.0 -219000431; 16.0 + mmsi | numtrips +-----------+--------------------- + 211188000 | 22.0000000000000000 + 211190000 | 22.0000000000000000 + 219000429 | 24.0000000000000000 + 219000431 | 24.0000000000000000 +(4 rows) @@ -444,18 +430,15 @@ MMSI NumTrips WITH B(Belt) AS ( - SELECT ST_MakeEnvelope(640730, 6058230, 654100, 6042487, 25832) -), + SELECT ST_MakeEnvelope(640730, 6058230, 654100, 6042487, 25832) ), BeltShips AS ( SELECT MMSI, atGeometry(S.Trip, B.Belt) AS Trip, trajectory(atGeometry(S.Trip, B.Belt)) AS Traj FROM Ships S, B - WHERE intersects(S.Trip, B.Belt) -) + WHERE eintersects(S.Trip, B.Belt) ) SELECT S1.MMSI, S2.MMSI, S1.Traj, S2.Traj, shortestLine(S1.trip, S2.trip) Approach FROM BeltShips S1, BeltShips S2 -WHERE S1.MMSI > S2.MMSI AND -dwithin(S1.trip, S2.trip, 300) +WHERE S1.MMSI > S2.MMSI AND edwithin(S1.trip, S2.trip, 300); -- Total query runtime: 28.5 secs -- 7 rows retrieved. diff --git a/doc/AIS_Dashboard.xml b/doc/AIS_Dashboard.xml index 56dc035..bd8e714 100644 --- a/doc/AIS_Dashboard.xml +++ b/doc/AIS_Dashboard.xml @@ -74,6 +74,7 @@ Connect to Data Source] section below. + Alternatively, you can set up your own MobilityDB database as described in the previous module. The raw data in CSV format is also available on the MobilityDB-workshop repository. @@ -96,7 +97,7 @@ macOS - + brew update brew install grafana brew services start grafana @@ -118,7 +119,7 @@ sudo apt-get install -y software-properties-common wget wget -q -O - https://packages.grafana.com/gpg.key | sudo apt-key add - # Add repository for stable releases -echo "deb https://packages.grafana.com/enterprise/deb stable main" && +echo "deb https://packages.grafana.com/enterprise/deb stable main" | sudo tee -a /etc/apt/sources.list.d/grafana.list # Install Grafana @@ -217,7 +218,7 @@ sudo apt-get install grafana-enterprise
Speed of Individual Ships - Let’s visualize the speed of the ships using the previously + Let's visualize the speed of the ships using the previously built query. Here we will represent it as a statistic with a color gradient. @@ -623,7 +624,7 @@ SELECT P.port_name, p.lng FROM ports AS P, Ships AS S -WHERE intersects(S.Trip, P.port_geom) +WHERE eintersects(S.Trip, P.port_geom) GROUP BY P.port_name, P.lat, P.lng @@ -719,7 +720,7 @@ WITH TimeShips AS ( SELECT MMSI, - atPeriod(S.Trip, period '[2018-01-04 01:00:00, 2018-01-04 06:30:00)' ) AS trip + atTime(S.Trip, tstzspan '[2018-01-04 01:00:00, 2018-01-04 06:30:00)' ) AS trip FROM Ships S ), @@ -738,7 +739,7 @@ WITH TimeShips S1, TimeShips S2 WHERE S1.MMSI > S2.MMSI AND - dwithin(S1.Trip, S2.Trip, 300) + edwithin(S1.Trip, S2.Trip, 300) ) -- The final SELECT is used to project the time_at_closest_distance onto the sequence of -- locations to return the lat and long of both ships. @@ -1015,7 +1016,7 @@ SELECT P.port_name, p.lat, p.lng FROM ports AS P, Ships AS S -WHERE intersects(S.Trip, P.port_geom) +WHERE eintersects(S.Trip, P.port_geom) GROUP BY P.port_name, P.lat, P.lng @@ -1024,12 +1025,12 @@ GROUP BY P.port_name, P.lat, P.lng sum -(numSequences(atGeometry( atPeriod(S.Trip, period '[$FromTime, $ToTime)'), P.port_geom))) +(numSequences(atGeometry( atTime(S.Trip, tstzspan '[$FromTime, $ToTime)'), P.port_geom))) AS trips_intersect_with_port Essentially we just wrapped S.Trip with - atPeriod() and passed our custom period range. + atTime() and passed our custom tstzspan range. The full query with this modification is below: @@ -1045,13 +1046,13 @@ WITH ports(port_name, port_geom, lat, lng) 54.64, 11.36)) AS p(port_name, port_geom, lat, lng)) SELECT P.port_name, - sum(numSequences(atGeometry(atPeriod(S.Trip, period '[$FromTime, $ToTime)'), + sum(numSequences(atGeometry(atTime(S.Trip, tstzspan '[$FromTime, $ToTime)'), P.port_geom))) AS trips_intersect_with_port, p.lat, p.lng FROM ports AS P, Ships AS S -WHERE intersects(S.Trip, P.port_geom) +WHERE eintersects(S.Trip, P.port_geom) GROUP BY P.port_name, P.lat, P.lng diff --git a/doc/FlightDataDashboard.xml b/doc/FlightDataDashboard.xml index 0aeae24..ffe9ef8 100644 --- a/doc/FlightDataDashboard.xml +++ b/doc/FlightDataDashboard.xml @@ -96,9 +96,9 @@
Preparing the Database - Dataset + The opensky data can be found in this Dataset link - + . Create a new database opensky, then use your SQL @@ -137,7 +137,9 @@ CREATE TABLE flights( Load the data into the database using the following command. Replace the <path_to_file> with the - actual path of the CSV file. Do this for all files. + actual path of the CSV file. Do this for all files. As before, if this command + throws a permission error, you can use the \copy command + from the psql shell. COPY flights(et, icao24, lat, lon, velocity, heading, @@ -923,26 +925,25 @@ CREATE TABLE flight_traj(icao24, callsign, flight_period, trip, velocity, headin AS -- callsign sequence unpacked into rows to split all other temporal sequences. WITH airframe_traj_with_unpacked_callsign AS - (SELECT icao24, - trip, - velocity, - heading, - vertrate, - squawk, - geoaltitude, - startValue(unnest(segments(callsign))) AS start_value_callsign, - unnest(segments(callsign))::period AS callsign_segment_period - FROM airframe_traj) - -SELECT icao24 AS icao24, - start_value_callsign AS callsign, - callsign_segment_period AS flight_period, - atPeriod(trip, callsign_segment_period) AS trip, - atPeriod(velocity, callsign_segment_period) AS velocity, - atPeriod(heading, callsign_segment_period) AS heading, - atPeriod(vertrate, callsign_segment_period) AS vertrate, - atPeriod(squawk, callsign_segment_period) AS squawk, - atPeriod(geoaltitude, callsign_segment_period) AS geoaltitude + (SELECT icao24, + trip, + velocity, + heading, + vertrate, + squawk, + geoaltitude, + startValue(unnest(segments(callsign))) AS start_value_callsign, + unnest(segments(callsign))::tstzspan AS callsign_segment_period + FROM airframe_traj) +SELECT icao24 AS icao24, + start_value_callsign AS callsign, + callsign_segment_period AS flight_period, + atTime(trip, callsign_segment_period) AS trip, + atTime(velocity, callsign_segment_period) AS velocity, + atTime(heading, callsign_segment_period) AS heading, + atTime(vertrate, callsign_segment_period) AS vertrate, + atTime(squawk, callsign_segment_period) AS squawk, + atTime(geoaltitude, callsign_segment_period) AS geoaltitude FROM airframe_traj_with_unpacked_callsign; @@ -1009,6 +1010,7 @@ ORDER BY twavg(velocity) desc; Title → Average Flight Speed + Show → All values @@ -1167,52 +1169,41 @@ WITH -- The flight_traj_time_slice CTE is clipping all the temporal columns -- to the user specified time-range. flight_traj_time_slice (icao24, callsign, time_slice_trip, time_slice_geoaltitude, - time_slice_vertrate) AS - (SELECT icao24, - callsign, - atPeriod(trip, period '[2020-06-01 03:00:00, 2020-06-01 20:30:00)'), - atPeriod(geoaltitude, period '[2020-06-01 03:00:00, 2020-06-01 20:30:00)'), - atPeriod(vertrate, - period '[2020-06-01 03:00:00, 2020-06-01 20:30:00)') - FROM flight_traj_sample TABLESAMPLE SYSTEM (20)), - +time_slice_vertrate) AS + (SELECT icao24, callsign, + atTime(trip, tstzspan '[2020-06-01 02:35:00, 2020-06-01 02:55:00)'), -- I changed the dates to fit my data, you should do the same!! + atTime(geoaltitude, tstzspan '[2020-06-01 02:35:00, 2020-06-01 02:55:00)'), + atTime(vertrate, tstzspan '[2020-06-01 02:35:00, 2020-06-01 02:55:00)') + FROM flight_traj_sample TABLESAMPLE SYSTEM (20)), -- There are 3 things happening in the flight_traj_time_slice_ascent CTE: -- 1. atRange: Clips the temporal data to create ranges where the vertrate --- was between '[1, 20]'. This vertrate means an aircraft was ascending. +-- was between '[1, 20]'. This vertrate means an aircraft was ascending. -- 2. sequenceN: Selects the first sequence from the generated sequences. --- This first sequence is takeoff and eliminates mid-flight ascents. +-- This first sequence is takeoff and eliminates mid-flight ascents. -- 3. atPeriod: Returns the period of the first sequence. flight_traj_time_slice_ascent(icao24, callsign, ascending_trip, ascending_geoaltitude, - ascending_vertrate) AS - (SELECT icao24, - callsign, - atPeriod(time_slice_trip, - period(sequenceN( - atRange(time_slice_vertrate, floatrange '[1,20]'), 1))), - atPeriod(time_slice_geoaltitude, - period(sequenceN(atRange(time_slice_vertrate, floatrange '[1,20]'), - 1))), - atPeriod(time_slice_vertrate, - period(sequenceN(atRange(time_slice_vertrate, floatrange '[1,20]'), - 1))) - FROM flight_traj_time_slice), - +ascending_vertrate) AS + (SELECT icao24, callsign, + atTime(time_slice_trip, sequenceN(atValues(time_slice_vertrate, floatspan '[1,200]'), 1)::tstzspan), + atTime(time_slice_geoaltitude, + sequenceN(atValues(time_slice_vertrate, floatspan '[1,20]'),1)::tstzspan), + atTime(time_slice_vertrate, + sequenceN(atValues(time_slice_vertrate, floatspan '[1,20]'),1)::tstzspan) + FROM flight_traj_time_slice), -- The final_output CTE uses unnest to unpack the temporal data into rows for -- visualization in Grafana. Each row will contain a latitude, longitude and the altitude -- and vertrate at those locations. final_output AS - (SELECT icao24, - callsign, - getValue(unnest(instants(ascending_geoaltitude))) AS geoaltitude, - getValue(unnest(instants(ascending_vertrate))) AS vertrate, - ST_X(getValue(unnest(instants(ascending_trip)))) AS lon, - ST_Y(getValue(unnest(instants(ascending_trip)))) AS lat - FROM flight_traj_time_slice_ascent) - + (SELECT icao24, callsign, + getValue(unnest(instants(ascending_geoaltitude))) AS geoaltitude, + getValue(unnest(instants(ascending_vertrate))) AS vertrate, + ST_X(getValue(unnest(instants(ascending_trip)))) AS lon, + ST_Y(getValue(unnest(instants(ascending_trip)))) AS lat + FROM flight_traj_time_slice_ascent) SELECT * FROM final_output WHERE vertrate IS NOT NULL - AND geoaltitude IS NOT NULL; +AND geoaltitude IS NOT NULL; Tips for QGIS visualization: @@ -1320,7 +1311,7 @@ WHERE vertrate IS NOT NULL - We will also add a manual override (top right of panel options, beside "All") to limit the minimum value of vertebrate. This will make all values + We will also add a manual override (top right of panel options, beside "All") to limit the minimum value of vertrate. This will make all values below the minimum the same color, making larger values more obvious. This can be used to quickly pinpoint locations where a large rate of ascent existed. diff --git a/doc/GPX.xml b/doc/GPX.xml index fa5ae64..0c1df6a 100644 --- a/doc/GPX.xml +++ b/doc/GPX.xml @@ -53,7 +53,7 @@ creator="Example creator"> The following Python program called gpx_to_csv.py uses expat, a stream-oriented XML parser library, to convert the above GPX file in CSV format. - + import sys import xml.parsers.expat @@ -84,7 +84,7 @@ p.ParseFile(sys.stdin.buffer) This Python program can be executed as follows. - + python3 gpx_to_csv.py < example.gpx > example.csv The resulting CSV file is given next. @@ -96,8 +96,8 @@ lon,lat,time 31.461996,30.039801,2015-02-09T08:10:58.00Z ... - The above CSV file can be loaded into MobilityDB as follows. - + The above CSV file can be loaded into MobilityDB as follows. If the command COPY throws a permission error, you can instead use the \copy command of psql to import the CSV file. + DROP TABLE IF EXISTS trips_input; CREATE TABLE trips_input ( date date, diff --git a/doc/GTFS.xml b/doc/GTFS.xml index e432d7f..22da9f8 100644 --- a/doc/GTFS.xml +++ b/doc/GTFS.xml @@ -54,14 +54,14 @@ We decompress the file with the data into a directory. This can be done using the command. - + unzip gtfs_data.zip We suppose in the following that the directory used is as follows /home/gtfs_tutorial/. - We create the tables to be loaded with the data in the CSV files as follows. - + First, you need to create a new database, gtfs, and issue the create extension command: CREATE EXTENSION mobilityDB CASCADE;. We then create the tables to be loaded with the data in the CSV files as follows. + CREATE TABLE agency ( agency_id text DEFAULT '', agency_name text DEFAULT NULL, @@ -204,8 +204,9 @@ INSERT INTO pickup_dropoff_types (type_id, description) VALUES - We can load the CSV files into the corresponding tables as follows. - + We can load the CSV files into the corresponding tables as follows. As in the previous examples, if you experience a permission + denied error, you can use the \copy command from the psql shell instead of the COPY command. + COPY calendar(service_id,monday,tuesday,wednesday,thursday,friday,saturday,sunday, start_date,end_date) FROM '/home/gtfs_tutorial/calendar.txt' DELIMITER ',' CSV HEADER; COPY calendar_dates(service_id,date,exception_type) @@ -229,7 +230,7 @@ location_type,parent_station) FROM '/home/gtfs_tutorial/stops.txt' DELIMITER ',' CSV HEADER; Finally, we create the geometries for routes and stops as follows. - + INSERT INTO shape_geoms SELECT shape_id, ST_MakeLine(array_agg( ST_SetSRID(ST_MakePoint(shape_pt_lon, shape_pt_lat),4326) ORDER BY shape_pt_sequence)) @@ -253,7 +254,7 @@ SET stop_geom = ST_SetSRID(ST_MakePoint(stop_lon, stop_lat),4326); Transforming GTFS Data for MobilityDB We start by creating a table that contains couples of service_id and date defining the dates at which a service is provided. - + DROP TABLE IF EXISTS service_dates; CREATE TABLE service_dates AS ( SELECT service_id, date_trunc('day', d)::date AS date @@ -281,7 +282,7 @@ WHERE exception_type = 1 AND start_date <= date AND date <= end_date We now create a table trip_stops that determines the stops for each trip. - + DROP TABLE IF EXISTS trip_stops; CREATE TABLE trip_stops ( trip_id text, @@ -315,7 +316,7 @@ WHERE t.shape_id = g.shape_id AND t.stop_id = s.stop_id; We now create a table trip_segs that defines the segments between two consecutive stops of a trip. - + DROP TABLE IF EXISTS trip_segs; CREATE TABLE trip_segs ( trip_id text, @@ -359,7 +360,7 @@ SET seg_length = ST_Length(seg_geom), no_points = ST_NumPoints(seg_geom); The geometry of a segment is a linestring containing multiple points. From the previous table we know at which time the trip arrived at the first point and at the last point of the segment. To determine at which time the trip arrived at the intermediate points of the segments, we create a table trip_points that contains all the points composing the geometry of a segment. - + DROP TABLE IF EXISTS trip_points; CREATE TABLE trip_points ( trip_id text, @@ -406,7 +407,7 @@ FROM temp3; Our last temporary table trips_input contains the data in the format that can be used for creating the MobilityDB trips. - + DROP TABLE IF EXISTS trips_input; CREATE TABLE trips_input ( trip_id text, @@ -428,21 +429,21 @@ ON t.service_id = s.service_id; Finally, table trips_mdb contains the MobilityDB trips. - + DROP TABLE IF EXISTS trips_mdb; CREATE TABLE trips_mdb ( - trip_id text NOT NULL, - route_id text NOT NULL, - date date NOT NULL, - trip tgeompoint, - PRIMARY KEY (trip_id, date) + trip_id text NOT NULL, + service_id text NOT NULL, + route_id text NOT NULL, + date date NOT NULL, + trip tgeompoint, + PRIMARY KEY (trip_id, date) ); -INSERT INTO trips_mdb(trip_id, route_id, date, trip) -SELECT trip_id, route_id, date, - tgeompoint_seq(array_agg(tgeompoint_inst(point_geom, t) ORDER BY T)) +INSERT INTO trips_mdb(trip_id, service_id, route_id, date, trip) +SELECT trip_id, service_id, route_id, date, tgeompoint_seq(array_agg(tgeompoint_inst(point_geom, t) ORDER BY T)) FROM trips_input -GROUP BY trip_id, route_id, date; +GROUP BY trip_id, service_id, route_id, date; INSERT INTO trips_mdb(trip_id, service_id, route_id, date, trip) SELECT trip_id, route_id, t.service_id, d.date, diff --git a/doc/images/points.png b/doc/images/points.png index d1dbc94..ad9f609 100644 Binary files a/doc/images/points.png and b/doc/images/points.png differ diff --git a/doc/images/trajs.png b/doc/images/trajs.png index 6dbfca3..8526a28 100644 Binary files a/doc/images/trajs.png and b/doc/images/trajs.png differ diff --git a/doc/images/trajsFiltered.png b/doc/images/trajsFiltered.png index 4330162..af99d33 100644 Binary files a/doc/images/trajsFiltered.png and b/doc/images/trajsFiltered.png differ diff --git a/doc/images/trajsWrongAzimuth.png b/doc/images/trajsWrongAzimuth.png index d795e8e..7212980 100644 Binary files a/doc/images/trajsWrongAzimuth.png and b/doc/images/trajsWrongAzimuth.png differ diff --git a/doc/images/trajsWrongSpeed.png b/doc/images/trajsWrongSpeed.png index 02fd424..175487a 100644 Binary files a/doc/images/trajsWrongSpeed.png and b/doc/images/trajsWrongSpeed.png differ diff --git a/doc/location_history.xml b/doc/location_history.xml index 4699e81..8350559 100644 --- a/doc/location_history.xml +++ b/doc/location_history.xml @@ -34,7 +34,7 @@ If we want to load location information into MobilityDB we only need the fields longitudeE7, latitudeE7, and timestampMs. To convert the original JSON file into a CSV file containing only these fields we can use jq, a command-line JSON processor. The following command - + cat location_history.json | jq -r ".locations[] | {latitudeE7, longitudeE7, timestampMs} | [.latitudeE7, .longitudeE7, .timestampMs] | @csv" > location_history.csv @@ -51,14 +51,16 @@ cat location_history.json | jq -r ".locations[] | {latitudeE7, longitudeE7, time The above command works well for files of moderate size since by default jq loads the whole input text in memory. For very large files you may consider the --stream option of jq, which parses input texts in a streaming fashion. - Now we can import the generated CSV file into PostgreSQL as follows. - + Now we can import the generated CSV file into PostgreSQL as follows. If the COPY command throws a permission + error, you can instead use the \copy command of psql to import the CSV file. + DROP TABLE IF EXISTS location_history; + CREATE TABLE location_history ( -latitudeE7 float, -longitudeE7 float, -timestampMs bigint, -date date + latitudeE7 float, + longitudeE7 float, + timestampMs bigint, + date date ); COPY location_history(latitudeE7, longitudeE7, timestampMs) FROM @@ -72,19 +74,20 @@ SET date = date(to_timestamp(timestampMs / 1000.0)::timestamptz); We can now transform this data into MobilityDB trips as follows. - + DROP TABLE IF EXISTS locations_mdb; + CREATE TABLE locations_mdb ( -date date NOT NULL, -trip tgeompoint, -trajectory geometry, -PRIMARY KEY (date) + date date NOT NULL, + trip tgeompoint, + trajectory geometry, + PRIMARY KEY (date) ); INSERT INTO locations_mdb(date, trip) SELECT date, tgeompoint_seq(array_agg(tgeompoint_inst( -ST_SetSRID(ST_Point(longitudeE7/1e7, latitudeE7/1e7),4326), -to_timestamp(timestampMs / 1000.0)::timestamptz) ORDER BY timestampMs)) + ST_SetSRID(ST_Point(longitudeE7/1e7, latitudeE7/1e7),4326), + to_timestamp(timestampMs / 1000.0)::timestamptz) ORDER BY timestampMs)) FROM location_history GROUP BY date; diff --git a/doc/mobilitydb-workshop.xml b/doc/mobilitydb-workshop.xml index 7889be9..73805fa 100644 --- a/doc/mobilitydb-workshop.xml +++ b/doc/mobilitydb-workshop.xml @@ -32,6 +32,32 @@ + + Mohammad Ismail + Tirmizi + + Université libre de Bruxelles, Belgium + + + + + Adam + Broniewski + + Université libre de Bruxelles, Belgium + + + + + Jose Antonio + Lorencio Abril + + Université libre de Bruxelles, Belgium + + Update contents to MobilityDB 1.1. + + + Every module in this workshop illustrates a usage scenario of MobilityDB. The data sets and the tools are described inside each of the modules. Eventually, additional modules will be added to discover more MobilityDB features.