From 7b6998c8f93a2df8ed8413ed338ccd83de4df92b Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Thu, 4 Aug 2022 16:39:28 +0100 Subject: [PATCH 01/12] Initial create grid of chips --- .gitignore | 2 + pyveg/src/grid_creator.py | 87 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+) create mode 100644 pyveg/src/grid_creator.py diff --git a/.gitignore b/.gitignore index e5493394..4efb1413 100644 --- a/.gitignore +++ b/.gitignore @@ -120,3 +120,5 @@ pyveg/testdata/test_ne_england/ # Virtual Environments env/ +birmingham-test__2022-*/ +Sentinel2-52.50N-1.90W-birmingham-reproject-test__2022-*/ diff --git a/pyveg/src/grid_creator.py b/pyveg/src/grid_creator.py new file mode 100644 index 00000000..803307de --- /dev/null +++ b/pyveg/src/grid_creator.py @@ -0,0 +1,87 @@ +from datetime import datetime, timedelta +from itertools import product + +import geopandas +import matplotlib +import pandas +import requests +from icecream import ic +from shapely.geometry import box + +uk_boundaries_reference_url = "https://github.com/wmgeolab/geoBoundaries/blob/main/releaseData/gbOpen/GBR/ADM0/geoBoundaries-GBR-ADM0_simplified.geojson" +uk_json = "geoBoundaries-GBR-ADM0_simplified.geojson" + +uk_bbox = [0, 0, 700000, 1300000] + + +def get_uk(): + uk_boundaries_url = "https://raw.githubusercontent.com/wmgeolab/geoBoundaries/main/releaseData/gbOpen/GBR/ADM0/geoBoundaries-GBR-ADM0_simplified.geojson" + uk_gdf = geopandas.read_file(uk_boundaries_url) + return uk_gdf + + +def create_grid_of_chips(chip_px_width: int): + # Hardcode the assumption that one pixel==10 metres + # chipe_dimension is the real-world chip size in metres + chip_dimension = chip_px_width * 10 + + # First create a flat dataframe with attributes + # - chip_id + # - x_lower_left + # - y_lower_left + x_range = range(uk_bbox[0], uk_bbox[2], chip_dimension) + y_range = range(uk_bbox[1], uk_bbox[3], chip_dimension) + + ids = [] + xs = [] + ys = [] + + for x, y in product(x_range, y_range): + ids.append(get_chip_id(x, y, chip_dimension)) + xs.append(x) + ys.append(y) + + df = pandas.DataFrame.from_dict( + {"chip_id": ids, "x_lower_left": xs, "y_lower_left": ys} + ) + + # Now create the geometeries for each cell and convert to a geodataframe + def _get_box(row): + x = row["x_lower_left"] + y = row["y_lower_left"] + return box(x, y, x + chip_dimension, y + chip_dimension) + + df["geometry"] = df.apply(_get_box, axis=1) + gdf = geopandas.GeoDataFrame(df, geometry="geometry", crs="EPSG:27700") + + return gdf + + +def get_chip_id(x, y, chip_dimension): + """ + Create a chip_id which is unique irrespective of chip_size and meaningful + """ + return "{:0>6}_{:0>7}_{:0>3}".format(x, y, chip_dimension) + + +if __name__ == "__main__": + chip_px_width = 32 + + start_time = datetime.now() + + chips_gdf = create_grid_of_chips(chip_px_width) + + stage1_time = datetime.now() + ic(stage1_time - start_time) + ic(len(chips_gdf)) + + layer_name = f"chips_{chip_px_width:0>2}" + chips_gdf.to_parquet(f"{layer_name}.parquet") + + stage2_time = datetime.now() + ic(stage2_time - stage1_time) + + chips_gdf.to_file("chips.gpkg", layer=layer_name) + stage3_time = datetime.now() + ic(stage3_time - stage2_time) + ic(stage3_time - start_time) From ceacbc1bbc37ed1c9fc51f6d902a06792a9f6db8 Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Wed, 10 Aug 2022 11:20:05 +0100 Subject: [PATCH 02/12] Create grids for chips and images of varying dimension --- pyveg/src/grid_creator.py | 81 +++++++++++++++++++++++++++++++-------- 1 file changed, 66 insertions(+), 15 deletions(-) diff --git a/pyveg/src/grid_creator.py b/pyveg/src/grid_creator.py index 803307de..51ff609c 100644 --- a/pyveg/src/grid_creator.py +++ b/pyveg/src/grid_creator.py @@ -7,6 +7,7 @@ import requests from icecream import ic from shapely.geometry import box +from shapely.ops import polygonize_full uk_boundaries_reference_url = "https://github.com/wmgeolab/geoBoundaries/blob/main/releaseData/gbOpen/GBR/ADM0/geoBoundaries-GBR-ADM0_simplified.geojson" uk_json = "geoBoundaries-GBR-ADM0_simplified.geojson" @@ -64,24 +65,74 @@ def get_chip_id(x, y, chip_dimension): return "{:0>6}_{:0>7}_{:0>3}".format(x, y, chip_dimension) -if __name__ == "__main__": - chip_px_width = 32 +def coastline_to_coastpoly(path_to_coastline=None): + if path_to_coastline is None: + path_to_coastline = "zip://strtgi_essh_gb.zip!strtgi_essh_gb/data/coastline.shp" + + coastline = geopandas.read_file(path_to_coastline) + + all_lines = coastline["geometry"].to_list() + ic(len(all_lines)) + result, dangles, cuts, invalids = polygonize_full(all_lines) + ic(len(result.geoms)) + ic(len(result.geoms)) + ic(len(dangles.geoms)) + ic(len(cuts.geoms)) + ic(len(invalids.geoms)) - start_time = datetime.now() + ic(result) + ic(dangles) + ic(cuts) + ic(invalids) - chips_gdf = create_grid_of_chips(chip_px_width) + polys_gdf = geopandas.GeoDataFrame( + {"geometry": result.geoms, "id": True}, crs="EPSG:27700" + ) + ic(polys_gdf) + + polys_gdf.to_file("coast_polys.gpkg", layer="all_polys") + polys_gdf = polys_gdf.dissolve() + polys_gdf.to_file("coast_polys.gpkg", layer="polys_disolved") - stage1_time = datetime.now() - ic(stage1_time - start_time) - ic(len(chips_gdf)) + # gdf['inside'] = gdf['geometry'].apply(lambda shp: shp.intersects(gdfBuffer.dissolve('LAND').iloc[0]['geometry'])) - layer_name = f"chips_{chip_px_width:0>2}" - chips_gdf.to_parquet(f"{layer_name}.parquet") + return polys_gdf - stage2_time = datetime.now() - ic(stage2_time - stage1_time) - chips_gdf.to_file("chips.gpkg", layer=layer_name) - stage3_time = datetime.now() - ic(stage3_time - stage2_time) - ic(stage3_time - start_time) +if __name__ == "__main__": + polys_gdf = coastline_to_coastpoly() + # chip_px_width = 32 + options = { + 16: "chips_16", + 32: "chips_32", + 32 * 16: "images_0512", + 32 * 32: "images_1024", + } + options = {32 * 32: "images_1024", 32 * 16: "images_0512"} + + all_grids = {} + + for chip_px_width, layer_name in options.items(): + start_time = datetime.now() + + chips_gdf = create_grid_of_chips(chip_px_width) + + stage1_time = datetime.now() + ic(stage1_time - start_time) + ic(len(chips_gdf)) + + coast = polys_gdf["geometry"][0] + ic(type(coast)) + chips_gdf["on_land"] = chips_gdf["geometry"].intersects(coast) + all_grids[layer_name] = chips_gdf + + # layer_name = f"chips_{chip_px_width:0>2}" + chips_gdf.to_parquet(f"{layer_name}.parquet") + + stage2_time = datetime.now() + ic(stage2_time - stage1_time) + + chips_gdf.to_file("chips.gpkg", layer=layer_name) + stage3_time = datetime.now() + ic(stage3_time - stage2_time) + ic(stage3_time - start_time) From f2e91c88ae69910f01b0d1e5f7759fbee18ae93b Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Wed, 10 Aug 2022 12:23:58 +0100 Subject: [PATCH 03/12] add grid_id convertor function --- pyveg/src/grid_creator.py | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/pyveg/src/grid_creator.py b/pyveg/src/grid_creator.py index 51ff609c..57721d26 100644 --- a/pyveg/src/grid_creator.py +++ b/pyveg/src/grid_creator.py @@ -1,3 +1,4 @@ +import re from datetime import datetime, timedelta from itertools import product @@ -65,6 +66,24 @@ def get_chip_id(x, y, chip_dimension): return "{:0>6}_{:0>7}_{:0>3}".format(x, y, chip_dimension) +def get_image_id_from_chip(chip_id: str, chip_dimension: int): + result = re.match("(?P\d{6})_(?P\d{7})_(?P\d+)", chip_id) + x_source = int(result.group("x")) + y_source = int(result.group("y")) + d_source = int(result.group("d")) + ic(x_source, y_source) + + if d_source > chip_dimension: + raise ValueError( + "Cannot derive chip_id of a chip which is smaller than the source id" + ) + + x_dest = x_source - (x_source % chip_dimension) + y_dest = y_source - (y_source % chip_dimension) + ic(x_dest, y_dest) + return get_chip_id(x_dest, y_dest, chip_dimension) + + def coastline_to_coastpoly(path_to_coastline=None): if path_to_coastline is None: path_to_coastline = "zip://strtgi_essh_gb.zip!strtgi_essh_gb/data/coastline.shp" @@ -100,6 +119,17 @@ def coastline_to_coastpoly(path_to_coastline=None): if __name__ == "__main__": + + # ic(get_image_id_from_chip("001536_0002048_032", 512)) + # ic(get_image_id_from_chip("001536_0002048_016", 512)) + # ic(get_image_id_from_chip("001536_0002048_032", 1024)) + # ic(get_image_id_from_chip("001536_0002048_016", 1024)) + # ic(get_image_id_from_chip("123456_1234567_032", 1024)) + # ic(get_image_id_from_chip("123456_1234567_016", 1024)) + + # # This case should fail + # ic(get_image_id_from_chip("123456_1234567_032", 16)) + polys_gdf = coastline_to_coastpoly() # chip_px_width = 32 options = { @@ -108,7 +138,7 @@ def coastline_to_coastpoly(path_to_coastline=None): 32 * 16: "images_0512", 32 * 32: "images_1024", } - options = {32 * 32: "images_1024", 32 * 16: "images_0512"} + # options = {32 * 32: "images_1024", 32 * 16: "images_0512"} all_grids = {} From 766d4ce5cd3e705abc81d0557739def7b0a50ba8 Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Wed, 10 Aug 2022 19:59:34 +0100 Subject: [PATCH 04/12] cherry pick from #29 --- .gitignore | 1 + environment.yml | 3 +- pyveg/configs/config_liverpool_bng_example.py | 40 ++++++++++++ pyveg/requirements.txt | 2 +- pyveg/src/download_modules.py | 62 ++++++++++++++----- pyveg/src/gee_interface.py | 8 ++- pyveg/src/image_utils.py | 2 +- pyveg/src/processor_modules.py | 2 + 8 files changed, 98 insertions(+), 22 deletions(-) create mode 100644 pyveg/configs/config_liverpool_bng_example.py diff --git a/.gitignore b/.gitignore index 4efb1413..0ed80b20 100644 --- a/.gitignore +++ b/.gitignore @@ -122,3 +122,4 @@ pyveg/testdata/test_ne_england/ env/ birmingham-test__2022-*/ Sentinel2-52.50N-1.90W-birmingham-reproject-test__2022-*/ +pyveg/testdata/Sentinel2/tmp_png/* diff --git a/environment.yml b/environment.yml index 78be7211..0eaec879 100644 --- a/environment.yml +++ b/environment.yml @@ -1,9 +1,10 @@ -name: gee_pipeline +name: gee_pipeline2 channels: - defaults - conda-forge dependencies: - python=3.7 - earthengine-api + - geetools - rasterio - coverage diff --git a/pyveg/configs/config_liverpool_bng_example.py b/pyveg/configs/config_liverpool_bng_example.py new file mode 100644 index 00000000..d8b193b7 --- /dev/null +++ b/pyveg/configs/config_liverpool_bng_example.py @@ -0,0 +1,40 @@ +from pyveg.configs.collections import data_collections +from pyveg.coordinates import coordinate_store + +name = "liverpool" + +# Define location to save all outputs +output_location = "liverpool-bng" +#output_location_type = "azure" +output_location_type = "local" + +# parse selection. Note (long, lat) GEE convention. +#entry = coordinate_store.loc[coordinate_id] +# modify this line to set coords based on entries in `coordinates.py` +#coordinate_id = "00" + +# parse selection. Note (long, lat) GEE convention. +#entry = coordinate_store.loc[coordinate_id] +#coordinates = (entry.longitude, entry.latitude) + +# bounds = [ 327771, 384988, 339052, 395656 ] +bounds = [297566, 345093, 387814, 430437] +date_range = ["2016-05-01", "2016-08-01"] + +# From the dictionary entries in data_collections.py, which shall we use +# (these will become "Sequences") +collections_to_use = ["Sentinel2"] + +# Dictionary defining what Modules should run in each Sequence. + +modules_to_use = { + "Sentinel2": [ + "VegetationDownloader", + "VegetationImageProcessor", + ], +} + +# The following demonstrates how parameters can be set for individual Modules or Sequences: +special_config = {"Sentinel2": {"time_per_point": "3m"} # this is a whole Sequence + # # and another Module + } diff --git a/pyveg/requirements.txt b/pyveg/requirements.txt index 9f24a2a1..0fd45e14 100644 --- a/pyveg/requirements.txt +++ b/pyveg/requirements.txt @@ -12,7 +12,7 @@ pytest requests dateparser python-igraph -pandas==1.1.1 +pandas geopandas earthengine-api geetools diff --git a/pyveg/src/download_modules.py b/pyveg/src/download_modules.py index ee4e27c7..a145fe76 100644 --- a/pyveg/src/download_modules.py +++ b/pyveg/src/download_modules.py @@ -3,28 +3,35 @@ """ import logging -import os -import subprocess + +# import os +# import subprocess import tempfile -from datetime import datetime, timedelta -import cv2 as cv -import dateparser +# import cv2 as cv +# import dateparser import ee -import requests + +# import requests from geetools import cloud_mask +from icecream import ic + +from pyveg.src.coordinate_utils import get_coords, get_region_string +from pyveg.src.date_utils import slice_time_period -from pyveg.src.coordinate_utils import get_region_string -from pyveg.src.date_utils import ( - find_mid_period, - get_num_n_day_slices, - slice_time_period, - slice_time_period_into_n, -) +# from pyveg.src.date_utils import ( +# find_mid_period, +# get_num_n_day_slices, +# slice_time_period, +# slice_time_period_into_n, +# ) from pyveg.src.file_utils import download_and_unzip from pyveg.src.gee_interface import add_NDVI, apply_mask_cloud from pyveg.src.pyveg_pipeline import BaseModule, logger +# from datetime import datetime, timedelta + + ee.Initialize() # silence google API WARNING @@ -126,11 +133,30 @@ def prep_data(self, date_range): ------- url_list: a list of URLs from which zipfiles can be downloaded from GEE. """ - region = get_region_string(self.coords, self.region_size) + # region = get_region_string(self.coords, self.region_size) + # start_date, end_date = date_range + + # image_coll = ee.ImageCollection(self.collection_name) + # geom = ee.Geometry.Point(self.coords) + region = get_region_string(self.bounds) + ic(self.bounds) + ic(region) start_date, end_date = date_range image_coll = ee.ImageCollection(self.collection_name) - geom = ee.Geometry.Point(self.coords) + geom = ee.Geometry.Point(ic(get_coords(self.bounds)), proj="EPSG:27700") + ll_point = ee.Geometry.Point( + (self.bounds[0], self.bounds[1]), proj="EPSG:27700" + ) + tr_point = ee.Geometry.Point( + (self.bounds[2], self.bounds[3]), proj="EPSG:27700" + ) + # geom = ee.Geometry.Rectangle(self.bounds, proj= "EPSG:27700") + geom = ee.Geometry.Rectangle( + coords=(ll_point, tr_point), proj="EPSG:27700", evenOdd=False + ) + # (ic(get_coords(self.bounds)), proj= "EPSG:27700") + ic(geom) dataset = image_coll.filterBounds(geom).filterDate(start_date, end_date) dataset_size = dataset.size().getInfo() @@ -143,9 +169,13 @@ def prep_data(self, date_range): image_list = self.prep_images(dataset) url_list = [] for image in image_list: + ic(image) # get a URL from which we can download the resulting data try: - url = image.getDownloadURL({"region": region, "scale": self.scale}) + url = image.getDownloadURL( + {"region": geom, "scale": self.scale, "crs": "EPSG:27700"} + # {"region": region, "crs": "EPSG:27700"} + ) url_list.append(url) except Exception as e: logger.info("Unable to get URL: {}".format(e)) diff --git a/pyveg/src/gee_interface.py b/pyveg/src/gee_interface.py index e58c5797..708138b2 100644 --- a/pyveg/src/gee_interface.py +++ b/pyveg/src/gee_interface.py @@ -3,15 +3,17 @@ """ import os -import shutil -from datetime import datetime -import cv2 as cv +# import cv2 as cv import ee from geetools import cloud_mask from .file_utils import download_and_unzip +# import shutil +# from datetime import datetime + + ee.Initialize() diff --git a/pyveg/src/image_utils.py b/pyveg/src/image_utils.py index 1a5637c2..b6aa7606 100644 --- a/pyveg/src/image_utils.py +++ b/pyveg/src/image_utils.py @@ -6,7 +6,7 @@ ``` """ -import json +# import json import os import sys diff --git a/pyveg/src/processor_modules.py b/pyveg/src/processor_modules.py index 9ae72033..786a193a 100644 --- a/pyveg/src/processor_modules.py +++ b/pyveg/src/processor_modules.py @@ -13,6 +13,8 @@ import cv2 as cv import numpy as np + +# import rasterio from PIL import Image from pyveg.src import azure_utils, batch_utils From bfb146a1b52a0a4e420d34bdd7143451e6328c72 Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Fri, 12 Aug 2022 09:19:51 +0100 Subject: [PATCH 05/12] Make `create_chip_grid` a standalone script --- pyveg/scripts/create_chip_grid.py | 118 ++++++++++++++++++ pyveg/src/grid_creator.py | 201 ++++++++++++++++-------------- pyveg/tests/test_grid_creator.py | 55 ++++++++ 3 files changed, 279 insertions(+), 95 deletions(-) create mode 100644 pyveg/scripts/create_chip_grid.py create mode 100644 pyveg/tests/test_grid_creator.py diff --git a/pyveg/scripts/create_chip_grid.py b/pyveg/scripts/create_chip_grid.py new file mode 100644 index 00000000..ec8e389d --- /dev/null +++ b/pyveg/scripts/create_chip_grid.py @@ -0,0 +1,118 @@ +from datetime import datetime + +import fiona +from icecream import ic + +from pyveg.src.grid_creator import ( + coastline_to_poly, + create_grid_of_chips, + create_raster_of_chips, + get_parent_image_id, +) + +# A collection of config options +config = { + "bounding_box": [0, 0, 700000, 1300000], + "target_crs": "EPSG:27700", + "path_to_coastline": "zip://../chips/strtgi_essh_gb.zip!strtgi_essh_gb/data/coastline.shp", + "output_path": "./data", + "pixel_scale": 10, +} + + +""" +The `output_options` dict should be specified in the format + : + +One output file name will be created for each key/values pair +""" +# Production values +output_options = { + 32: "chips_32", + 32 * 16: "images_0512", + 32 * 32: "images_1024", +} + +# Testing values +# output_options = { +# 32 * 16: "images_0512", +# 32 * 32: "images_1024", +# } + + +def get_larger_chip_id_func(chip_dimension: int): + def apply_larger_chip_id(row): + return get_parent_image_id( + child_x=row["x_lower_left"], + child_y=row["y_lower_left"], + parent_chip_dimension=chip_dimension, + ) + + return apply_larger_chip_id + + +if __name__ == "__main__": + + try: + coast = coastline_to_poly( + path_to_coastline=config["path_to_coastline"], crs=config["target_crs"] + ) + except fiona.errors.DriverError: + ic("warning: coastline data not found") + coast = None + + for chip_px_width, layer_name in output_options.items(): + start_time = datetime.now() + ic(start_time) + + # First create the raster + create_raster_of_chips( + chip_px_width=chip_px_width, + pixel_scale=config["pixel_scale"], + target_crs=config["target_crs"], + bounding_box=config["bounding_box"], + base_output_filename=layer_name, + ) + + # Next create the grid as polygons + chips_gdf = create_grid_of_chips( + chip_px_width=chip_px_width, + pixel_scale=config["pixel_scale"], + target_crs=config["target_crs"], + bounding_box=config["bounding_box"], + ) + + stage1_time = datetime.now() + ic(stage1_time - start_time) + + # Add `on_land` column if the coast data is available + if coast: + chips_gdf["on_land"] = chips_gdf["geometry"].intersects(coast) + + stage2_time = datetime.now() + ic(stage2_time - stage1_time) + + # # Now add cross-references to image ids (for different size images) + # for larger_chip_width, larger_name in output_options.items(): + # ic(larger_chip_width, chip_px_width) + # if larger_chip_width > chip_px_width: + # get_larger_chip_id = get_larger_chip_id_func( + # larger_chip_width * config["pixel_scale"] + # ) + # chips_gdf[f"{larger_name}_id"] = chips_gdf.apply( + # get_larger_chip_id, axis=1 + # ) + + # ic(chips_gdf.head(10)) + + # Write to geoparquet file + chips_gdf.to_parquet(f"{layer_name}.parquet") + + stage3_time = datetime.now() + ic(stage3_time - stage2_time) + + # Write to GeoPackage DBN (useful for display in GIS) + # chips_gdf.to_file("chips.gpkg", layer=layer_name) + stage4_time = datetime.now() + ic(stage4_time - stage3_time) + ic(stage4_time - start_time) diff --git a/pyveg/src/grid_creator.py b/pyveg/src/grid_creator.py index 57721d26..141b663d 100644 --- a/pyveg/src/grid_creator.py +++ b/pyveg/src/grid_creator.py @@ -1,38 +1,37 @@ import re -from datetime import datetime, timedelta from itertools import product +from typing import List +from unittest import result import geopandas -import matplotlib +import numpy as np import pandas -import requests from icecream import ic -from shapely.geometry import box +from osgeo import gdal, osr +from shapely.geometry import Polygon, box from shapely.ops import polygonize_full -uk_boundaries_reference_url = "https://github.com/wmgeolab/geoBoundaries/blob/main/releaseData/gbOpen/GBR/ADM0/geoBoundaries-GBR-ADM0_simplified.geojson" -uk_json = "geoBoundaries-GBR-ADM0_simplified.geojson" +chip_search = re.compile("(?P\d{6})_(?P\d{7})_(?P\d+)") -uk_bbox = [0, 0, 700000, 1300000] - -def get_uk(): - uk_boundaries_url = "https://raw.githubusercontent.com/wmgeolab/geoBoundaries/main/releaseData/gbOpen/GBR/ADM0/geoBoundaries-GBR-ADM0_simplified.geojson" - uk_gdf = geopandas.read_file(uk_boundaries_url) - return uk_gdf - - -def create_grid_of_chips(chip_px_width: int): +def create_grid_of_chips( + chip_px_width: int, pixel_scale: int, target_crs: str, bounding_box: List[int] +) -> geopandas.GeoDataFrame: + """ + @param chip_px_width The number of pixels along each edge fo the a chip/image + @param pixel_scale The real-world size of one pixel in the units of the target_crs + @param target_crs The target CRS + """ # Hardcode the assumption that one pixel==10 metres - # chipe_dimension is the real-world chip size in metres - chip_dimension = chip_px_width * 10 + # chip_dimension is the real-world chip size in metres + chip_dimension = chip_px_width * pixel_scale # First create a flat dataframe with attributes # - chip_id # - x_lower_left # - y_lower_left - x_range = range(uk_bbox[0], uk_bbox[2], chip_dimension) - y_range = range(uk_bbox[1], uk_bbox[3], chip_dimension) + x_range = range(bounding_box[0], bounding_box[2], chip_dimension) + y_range = range(bounding_box[1], bounding_box[3], chip_dimension) ids = [] xs = [] @@ -54,115 +53,127 @@ def _get_box(row): return box(x, y, x + chip_dimension, y + chip_dimension) df["geometry"] = df.apply(_get_box, axis=1) - gdf = geopandas.GeoDataFrame(df, geometry="geometry", crs="EPSG:27700") + gdf = geopandas.GeoDataFrame(df, geometry="geometry", crs=target_crs) return gdf -def get_chip_id(x, y, chip_dimension): +def get_chip_id(x: int, y: int, chip_dimension: int) -> str: """ - Create a chip_id which is unique irrespective of chip_size and meaningful + Create a chip_id which is both meaningful and unique irrespective of chip_size. """ return "{:0>6}_{:0>7}_{:0>3}".format(x, y, chip_dimension) -def get_image_id_from_chip(chip_id: str, chip_dimension: int): - result = re.match("(?P\d{6})_(?P\d{7})_(?P\d+)", chip_id) +def get_image_id_from_chip(chip_id: str, chip_dimension: int) -> str: + # result = re.match("(?P\d{6})_(?P\d{7})_(?P\d+)", chip_id) + result = chip_search.match(chip_id) x_source = int(result.group("x")) y_source = int(result.group("y")) d_source = int(result.group("d")) - ic(x_source, y_source) if d_source > chip_dimension: + # ic(d_source, chip_dimension) raise ValueError( "Cannot derive chip_id of a chip which is smaller than the source id" ) - x_dest = x_source - (x_source % chip_dimension) - y_dest = y_source - (y_source % chip_dimension) - ic(x_dest, y_dest) - return get_chip_id(x_dest, y_dest, chip_dimension) + return get_parent_image_id( + child_x=x_source, child_y=y_source, parent_chip_dimension=chip_dimension + ) + + +def get_parent_image_id(child_x: int, child_y: int, parent_chip_dimension: int) -> str: + x_dest = child_x - (child_x % parent_chip_dimension) + y_dest = child_y - (child_y % parent_chip_dimension) + + return get_chip_id(x_dest, y_dest, parent_chip_dimension) + + +def coastline_to_poly(path_to_coastline: str, crs: str) -> Polygon: -def coastline_to_coastpoly(path_to_coastline=None): if path_to_coastline is None: path_to_coastline = "zip://strtgi_essh_gb.zip!strtgi_essh_gb/data/coastline.shp" coastline = geopandas.read_file(path_to_coastline) all_lines = coastline["geometry"].to_list() - ic(len(all_lines)) result, dangles, cuts, invalids = polygonize_full(all_lines) - ic(len(result.geoms)) - ic(len(result.geoms)) - ic(len(dangles.geoms)) - ic(len(cuts.geoms)) - ic(len(invalids.geoms)) - - ic(result) - ic(dangles) - ic(cuts) - ic(invalids) - - polys_gdf = geopandas.GeoDataFrame( - {"geometry": result.geoms, "id": True}, crs="EPSG:27700" - ) - ic(polys_gdf) - - polys_gdf.to_file("coast_polys.gpkg", layer="all_polys") - polys_gdf = polys_gdf.dissolve() - polys_gdf.to_file("coast_polys.gpkg", layer="polys_disolved") - - # gdf['inside'] = gdf['geometry'].apply(lambda shp: shp.intersects(gdfBuffer.dissolve('LAND').iloc[0]['geometry'])) - return polys_gdf + # Check that there where no errors + assert len(dangles.geoms) == 0 + assert len(cuts.geoms) == 0 + assert len(invalids.geoms) == 0 + polys_gdf = geopandas.GeoDataFrame({"geometry": result.geoms, "id": True}, crs=crs) -if __name__ == "__main__": + polys_gdf = polys_gdf.dissolve() + coast = polys_gdf["geometry"][0] - # ic(get_image_id_from_chip("001536_0002048_032", 512)) - # ic(get_image_id_from_chip("001536_0002048_016", 512)) - # ic(get_image_id_from_chip("001536_0002048_032", 1024)) - # ic(get_image_id_from_chip("001536_0002048_016", 1024)) - # ic(get_image_id_from_chip("123456_1234567_032", 1024)) - # ic(get_image_id_from_chip("123456_1234567_016", 1024)) + return coast - # # This case should fail - # ic(get_image_id_from_chip("123456_1234567_032", 16)) - polys_gdf = coastline_to_coastpoly() +def create_raster_of_chips( + chip_px_width: int, + pixel_scale: int, + target_crs: str, + bounding_box: List[int], + base_output_filename: str, +): + """ + Using https://stackoverflow.com/a/33950009 + """ + # Choose some Geographic Transform + # uk_bbox = [0, 0, 700000, 1300000] # chip_px_width = 32 - options = { - 16: "chips_16", - 32: "chips_32", - 32 * 16: "images_0512", - 32 * 32: "images_1024", - } - # options = {32 * 32: "images_1024", 32 * 16: "images_0512"} - - all_grids = {} - - for chip_px_width, layer_name in options.items(): - start_time = datetime.now() - - chips_gdf = create_grid_of_chips(chip_px_width) - - stage1_time = datetime.now() - ic(stage1_time - start_time) - ic(len(chips_gdf)) + chip_dimension = chip_px_width * pixel_scale - coast = polys_gdf["geometry"][0] - ic(type(coast)) - chips_gdf["on_land"] = chips_gdf["geometry"].intersects(coast) - all_grids[layer_name] = chips_gdf - - # layer_name = f"chips_{chip_px_width:0>2}" - chips_gdf.to_parquet(f"{layer_name}.parquet") - - stage2_time = datetime.now() - ic(stage2_time - stage1_time) + # First create a flat dataframe with attributes + # - chip_id + # - x_lower_left + # - y_lower_left + x_range = range(bounding_box[0], bounding_box[2], chip_dimension) + y_range = range(bounding_box[1], bounding_box[3], chip_dimension) + image_size = (len(y_range), len(x_range)) + ic(image_size) + + # Create Each Channel + pixels = np.zeros((image_size), dtype=np.uint8) + + # set geotransform + nx = image_size[1] + ny = image_size[0] + + # Set the Pixel Data + for x, y in product(range(0, nx), range(0, ny)): + # Create a checker board effect + if bool(x % 2) == bool(y % 2): + pixels[y, x] = 255 + else: + pixels[y, x] = 0 + + # Create the transformation + # The raster should be aligned at the top-left of the image + # Whilst the grid is aligned at the bottom-left + xmin, ymin, xmax, ymax = bounding_box + top_left = ymax + chip_dimension - (ymax % chip_dimension) + geotransform = (xmin, chip_dimension, 0, top_left, 0, -chip_dimension) + + # Get the EPSG number as an int + result = re.match("EPSG:(?P\d+)", target_crs) + epsg_code = int(result.group("code")) + ic(epsg_code) + + # create the 3-band raster file + dst_ds = gdal.GetDriverByName("GTiff").Create( + f"{base_output_filename}.tiff", nx, ny, 1, gdal.GDT_Byte + ) - chips_gdf.to_file("chips.gpkg", layer=layer_name) - stage3_time = datetime.now() - ic(stage3_time - stage2_time) - ic(stage3_time - start_time) + dst_ds.SetGeoTransform(geotransform) # specify coords + srs = osr.SpatialReference() # establish encoding + srs.ImportFromEPSG(epsg_code) # WGS84 lat/long + dst_ds.SetProjection(srs.ExportToWkt()) # export coords to file + dst_ds.GetRasterBand(1).WriteArray(pixels) # write r-band to the raster + dst_ds.FlushCache() # write to disk + dst_ds = None diff --git a/pyveg/tests/test_grid_creator.py b/pyveg/tests/test_grid_creator.py new file mode 100644 index 00000000..65aad077 --- /dev/null +++ b/pyveg/tests/test_grid_creator.py @@ -0,0 +1,55 @@ +import pytest +from icecream import ic +from matplotlib.pyplot import gci + +import pyveg.src.grid_creator as grid_creator + + +def test_create_grid_of_chips(): + expected_bounding_box = [0, 0, 100, 100] + + actual_gdf = grid_creator.create_grid_of_chips( + chip_px_width=10, + pixel_scale=1, + target_crs="EPSG:27700", + bounding_box=expected_bounding_box, + ) + + assert len(actual_gdf) == 100 + + assert all([a == e for a, e in zip(actual_gdf.total_bounds, expected_bounding_box)]) + + +def test_get_chip_id(): + + # Assign a shorter name for readability + gcid = grid_creator.get_chip_id + + assert gcid(1, 2, 3) == "000001_0000002_003" + assert gcid(123456789, 123456789, 12345) == "123456789_123456789_12345" + + +def test_get_image_id_from_chip(): + + # Assign a shorter name for readability + c2id = grid_creator.get_image_id_from_chip + + # Cases where only the width changes + assert c2id("001536_0002048_032", 512) == "001536_0002048_512" + assert c2id("001536_0002048_032", 512) == "001536_0002048_512" + assert c2id("001536_0002048_016", 512) == "001536_0002048_512" + # Cases where the `x` value changes + assert c2id("001536_0002048_032", 1024) == "001024_0002048_1024" + assert c2id("001536_0002048_016", 1024) == "001024_0002048_1024" + # Cases there all three values change + assert c2id("123456_1234567_032", 1024) == "122880_1233920_1024" + assert c2id("123456_1234567_016", 1024) == "122880_1233920_1024" + + # This case should fail, because the requested chip size (16) is smaller than the source chip size (32) + with pytest.raises(ValueError): + c2id("123456_1234567_032", 16) + + +@pytest.mark.skip(reason="No suitable test data yet") +def test_coastline_to_poly(): + pass From 3b84773e0d3e6b30e0d7b073ec894bf0f4b43822 Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Fri, 12 Aug 2022 09:35:14 +0100 Subject: [PATCH 06/12] Fix broken environment.yml --- environment.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/environment.yml b/environment.yml index 0eaec879..78be7211 100644 --- a/environment.yml +++ b/environment.yml @@ -1,10 +1,9 @@ -name: gee_pipeline2 +name: gee_pipeline channels: - defaults - conda-forge dependencies: - python=3.7 - earthengine-api - - geetools - rasterio - coverage From 7a5976d8df991924db4039ceaf782a2110f69c66 Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Fri, 12 Aug 2022 09:53:56 +0100 Subject: [PATCH 07/12] fix missing dependancy --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 78be7211..11122582 100644 --- a/environment.yml +++ b/environment.yml @@ -7,3 +7,4 @@ dependencies: - earthengine-api - rasterio - coverage + - icecream From 4453ada659ea65e30f828f8271790a87c9172e24 Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Fri, 12 Aug 2022 10:06:58 +0100 Subject: [PATCH 08/12] More missing dependencies --- environment.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/environment.yml b/environment.yml index 11122582..c992e9b0 100644 --- a/environment.yml +++ b/environment.yml @@ -8,3 +8,4 @@ dependencies: - rasterio - coverage - icecream + - gdal From 0b1e3bc596077dbf2a10633561296cf6e347e134 Mon Sep 17 00:00:00 2001 From: Andy Smith Date: Fri, 12 Aug 2022 11:05:07 +0100 Subject: [PATCH 09/12] fix linting error --- pyveg/src/download_modules.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyveg/src/download_modules.py b/pyveg/src/download_modules.py index e16e2513..367c3c2f 100644 --- a/pyveg/src/download_modules.py +++ b/pyveg/src/download_modules.py @@ -4,7 +4,6 @@ import imp import logging - import tempfile import ee From b63fe6dd68297009f252bfa5c7fc5eb0aa35c258 Mon Sep 17 00:00:00 2001 From: Camila Rangel Smith Date: Thu, 18 Aug 2022 14:25:41 +0100 Subject: [PATCH 10/12] Delete test_data_analysis_utils.py --- pyveg/tests/test_data_analysis_utils.py | 88 ------------------------- 1 file changed, 88 deletions(-) delete mode 100644 pyveg/tests/test_data_analysis_utils.py diff --git a/pyveg/tests/test_data_analysis_utils.py b/pyveg/tests/test_data_analysis_utils.py deleted file mode 100644 index 7b3b309c..00000000 --- a/pyveg/tests/test_data_analysis_utils.py +++ /dev/null @@ -1,88 +0,0 @@ -""" -Test the functions in data_analysis_utils.py -""" - -import json -import os -import shutil - -from pyveg.src.analysis_preprocessing import * -from pyveg.src.data_analysis_utils import * - - -def test_coarse_dataframe(): - - json_filename = os.path.join( - os.path.dirname(__file__), - "..", - "testdata", - "network_json_data/test-results-summary.json", - ) - results_dict = json.load(open(json_filename)) - test_df = read_json_to_dataframes(results_dict) - - data_df = convert_to_geopandas(test_df["COPERNICUS/S2"]) - - data_df = coarse_dataframe(data_df, 11) - - n_blocks = len(np.unique([i for i in data_df["category"]])) / len( - np.unique(data_df["date"]) - ) - - assert n_blocks == 2.0 - - -def test_create_lat_long_metric_figures(): - - dir_path = os.path.join( - os.path.dirname(__file__), "..", "testdata", "network_json_data/" - ) - - json_filename = os.path.join( - os.path.dirname(__file__), - "..", - "testdata", - "network_json_data/test-results-summary.json", - ) - summary_json = json.load(open(json_filename)) - test_df = read_json_to_dataframes(summary_json) - - data_df = convert_to_geopandas(test_df["COPERNICUS/S2"]) - - for filename in os.listdir(dir_path): - if filename.endswith(".png"): - os.unlink(os.path.join(dir_path, filename)) - tmp_png_path = os.path.join(dir_path, "tmp_png") - create_lat_long_metric_figures(data_df, "offset50", tmp_png_path) - - list_png_files = [ - f - for f in os.listdir(tmp_png_path) - if (os.path.isfile(os.path.join(tmp_png_path, f)) and f.endswith(".png")) - ] - len_dates = len(np.unique(data_df["date"])) - - assert len(list_png_files) == len_dates - # now delete the test png files - shutil.rmtree(tmp_png_path, ignore_errors=True) - - -def test_moving_window_analysis(): - - path_to_dict = os.path.join( - os.path.dirname(__file__), - "..", - "testdata", - "network_json_data/results_summary.json", - ) - results_json = json.load(open(path_to_dict)) - dfs = read_json_to_dataframes(results_json) - # make_time_series now returns a list - we only want first element - time_series_dfs = make_time_series(dfs.copy())[0] - - ar1_var_df = moving_window_analysis( - time_series_dfs, os.path.dirname(path_to_dict), 0.5 - ) - - keys_ar1 = list(ar1_var_df.keys()) - assert ar1_var_df.shape == (38, 9) From a2fc5d6db45971d1cce35bbc44128b1a726a0f61 Mon Sep 17 00:00:00 2001 From: Camila Rangel Smith Date: Thu, 18 Aug 2022 14:25:53 +0100 Subject: [PATCH 11/12] Delete test_analyse_gee_data.py --- pyveg/tests/test_analyse_gee_data.py | 40 ---------------------------- 1 file changed, 40 deletions(-) delete mode 100644 pyveg/tests/test_analyse_gee_data.py diff --git a/pyveg/tests/test_analyse_gee_data.py b/pyveg/tests/test_analyse_gee_data.py deleted file mode 100644 index 3132b1e8..00000000 --- a/pyveg/tests/test_analyse_gee_data.py +++ /dev/null @@ -1,40 +0,0 @@ -""" -Test the functions in analyse_gee_data.py -""" -import json -import os -import shutil - -from pyveg.scripts.analyse_gee_data import analyse_gee_data - - -def test_analyse_gee_data(): - - input_dir = os.path.join( - os.path.dirname(__file__), "..", "testdata", "network_json_data" - ) - analysis_path = os.path.join(input_dir, "analysis") - - # remove old tests - if os.path.exists(analysis_path): - shutil.rmtree(analysis_path) - - # run script - analyse_gee_data(input_dir, do_spatial=True) - - # assert script produced output - assert os.path.exists(analysis_path) == True - assert os.path.exists(os.path.join(analysis_path, "spatial")) == True - - list_png_files = [ - f - for f in os.listdir(os.path.join(input_dir, "analysis", "time-series")) - if ( - os.path.isfile( - os.path.join(os.path.join(input_dir, "analysis", "time-series"), f) - ) - and f.endswith(".png") - ) - ] - - assert len(list_png_files) > 0 From 2d22e5fda548c6da6eb6426ae6262d20735f1465 Mon Sep 17 00:00:00 2001 From: Camila Rangel Smith Date: Thu, 18 Aug 2022 14:26:16 +0100 Subject: [PATCH 12/12] Delete test_pattern_gen.py --- pyveg/tests/test_pattern_gen.py | 238 -------------------------------- 1 file changed, 238 deletions(-) delete mode 100644 pyveg/tests/test_pattern_gen.py diff --git a/pyveg/tests/test_pattern_gen.py b/pyveg/tests/test_pattern_gen.py deleted file mode 100644 index d1b87dd3..00000000 --- a/pyveg/tests/test_pattern_gen.py +++ /dev/null @@ -1,238 +0,0 @@ -""" -Tests for the functions and methods in pattern_generation.py -""" -import os - -import numpy as np -import pytest - -from pyveg.src.pattern_generation import PatternGenerator as PG - - -def test_plant_change_zero(): - plant_biomass = 0.0 - soil_water = 0 - grazing_loss = 0.3 - senescence = 0.1 - growth_constant = 0.05 - uptake = 10.0 - uptake_saturation = 3.0 - assert ( - PG.calc_plant_change( - plant_biomass, - soil_water, - uptake, - uptake_saturation, - growth_constant, - senescence, - grazing_loss, - ) - == 0 - ) - - -def test_plant_decreases(): - # first get a benchmark - plant_biomass = 90.0 - soil_water = 0 - grazing_loss = 0.3 - senescence = 0.1 - growth_constant = 0.05 - uptake = 10.0 - uptake_saturation = 3.0 - change_1 = PG.calc_plant_change( - plant_biomass, - soil_water, - uptake, - uptake_saturation, - growth_constant, - senescence, - grazing_loss, - ) - - # now increase grazing_loss - grazing_loss = 0.5 - change_2 = PG.calc_plant_change( - plant_biomass, - soil_water, - uptake, - uptake_saturation, - growth_constant, - senescence, - grazing_loss, - ) - - assert change_1 > change_2 - - # now increase senescence - senescence = 0.2 - change_3 = PG.calc_plant_change( - plant_biomass, - soil_water, - uptake, - uptake_saturation, - growth_constant, - senescence, - grazing_loss, - ) - - assert change_2 > change_3 - - -def test_plant_increases(): - # first get a benchmark - plant_biomass = 90.0 - soil_water = 0 - grazing_loss = 0.3 - senescence = 0.1 - growth_constant = 0.05 - uptake = 10.0 - uptake_saturation = 3.0 - change_1 = PG.calc_plant_change( - plant_biomass, - soil_water, - uptake, - uptake_saturation, - growth_constant, - senescence, - grazing_loss, - ) - - # now increase the soil water - soil_water = 3.0 - change_2 = PG.calc_plant_change( - plant_biomass, - soil_water, - uptake, - uptake_saturation, - growth_constant, - senescence, - grazing_loss, - ) - - assert change_2 > change_1 - - -def test_surface_water_zero(): - # zero rainfall, zero starting water - rainfall = 0.0 - plant_biomass = 90.0 - surface_water = 0.0 - frac_available = 0.1 - bare_soil_infilt = 0.15 - infilt_saturation = 5 - change = PG.calc_surface_water_change( - surface_water, - plant_biomass, - rainfall, - frac_available, - bare_soil_infilt, - infilt_saturation, - ) - assert change == 0 - - -def test_surface_water_more_rain(): - # First get a benchmark - rainfall = 0.0 - plant_biomass = 90.0 - surface_water = 0.0 - frac_available = 0.1 - bare_soil_infilt = 0.15 - infilt_saturation = 5 - change_1 = PG.calc_surface_water_change( - surface_water, - plant_biomass, - rainfall, - frac_available, - bare_soil_infilt, - infilt_saturation, - ) - # now increase rainfall - rainfall = 1.4 - change_2 = PG.calc_surface_water_change( - surface_water, - plant_biomass, - rainfall, - frac_available, - bare_soil_infilt, - infilt_saturation, - ) - assert change_2 > change_1 - - -def test_surface_water_more_absorption(): - # First get a benchmark - rainfall = 1.4 - plant_biomass = 90.0 - surface_water = 3.0 - frac_available = 0.1 - bare_soil_infilt = 0.15 - infilt_saturation = 5 - change_1 = PG.calc_surface_water_change( - surface_water, - plant_biomass, - rainfall, - frac_available, - bare_soil_infilt, - infilt_saturation, - ) - # now increase soil absorption - bare_soil_infilt = 0.2 - change_2 = PG.calc_surface_water_change( - surface_water, - plant_biomass, - rainfall, - frac_available, - bare_soil_infilt, - infilt_saturation, - ) - assert change_2 < change_1 - - -def test_soil_water_change_zero(): - # no surface water, no plants, no evaporation - plant_biomass = 0 - soil_water = 3.0 - surface_water = 0 - frac_available = 0.1 - bare_soil_infilt = 0.15 - infilt_saturation = 5.0 - plant_growth = 0.0 - soil_water_evap = 0.0 - uptake_saturation = 3.0 - change = PG.calc_soil_water_change( - soil_water, - surface_water, - plant_biomass, - frac_available, - bare_soil_infilt, - infilt_saturation, - plant_growth, - soil_water_evap, - uptake_saturation, - ) - assert change == 0 - - -def test_plant_growth_quantitative(): - # create a PG object and check results against expected - pg = PG() - pg.set_rainfall(1.0) - starting_pattern_filename = os.path.join( - os.path.dirname(__file__), "..", "testdata", "binary_labyrinths_50.csv" - ) - pg.set_starting_pattern_from_file(starting_pattern_filename) - pg.initial_conditions() - pg.evolve_pattern(100) - - expected = np.loadtxt( - os.path.join( - os.path.dirname(__file__), "..", "testdata", "PG-1mm-100iterations.csv" - ), - delimiter=",", - ) - - assert ( - pg.plant_biomass.round(1) == expected.round(1) - ).all() # agreement to two decimal places