diff --git a/people-and-planet-ai/README.md b/people-and-planet-ai/README.md index 066672423ad1..134c73ec7512 100644 --- a/people-and-planet-ai/README.md +++ b/people-and-planet-ai/README.md @@ -2,8 +2,8 @@ ## 🦏 [Wildlife Insights -- _image-classification_](image-classification) -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/image-classification/README.ipynb) - +> [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/image-classification/README.ipynb) +> > [Watch the video in YouTube
![thumbnail](http://img.youtube.com/vi/hUzODH3uGg0/0.jpg)](https://youtu.be/hUzODH3uGg0) This model is trained to recognize animal species from @@ -20,10 +20,12 @@ pictures. [AutoML]: https://cloud.google.com/vertex-ai/docs/beginner/beginners-guide [Vertex AI]: https://cloud.google.com/vertex-ai -## 🗺 [Global Fishing Watch -- _timeseries-classification_](timeseries-classification) +--- -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/timeseries-classification/README.ipynb) +## 🗺 [Global Fishing Watch -- _timeseries-classification_](timeseries-classification) +> [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/timeseries-classification/README.ipynb) +> > [Watch the video in YouTube
![thumbnail](http://img.youtube.com/vi/LnEhSVEJUuY/0.jpg)](https://youtu.be/LnEhSVEJUuY) This model is trained to classify if a ship is fishing or not every hour from their @@ -41,10 +43,12 @@ location data. [Keras]: https://keras.io [Vertex AI]: https://cloud.google.com/vertex-ai -## 🏭 [Coal Plant Predictions -- _geospatial-classification_](geospatial-classification) +--- -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/geospatial-classification/README.ipynb) +## 🏭 [Coal Plant Predictions -- _geospatial-classification_](geospatial-classification) +> [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/geospatial-classification/README.ipynb) +> > [Watch the video in YouTube
![thumbnail](http://img.youtube.com/vi/8amFK7T_n30/0.jpg)](https://youtu.be/8amFK7T_n30) This model uses satellite data to predict if a coal plant is turned on and producing carbon emissions. The satellite data comes from [Google Earth Engine.](https://earthengine.google.com/) @@ -60,10 +64,12 @@ This model uses satellite data to predict if a coal plant is turned on and produ [TensorFlow]: https://www.tensorflow.org/ [Vertex AI]: https://cloud.google.com/vertex-ai -## 🌍 [Land cover classification -- _image segmentation_](land-cover-classification) +--- -[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/land-cover-classification/README.ipynb) +## 🌍 [Land cover classification -- _image segmentation_](land-cover-classification) +> [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/land-cover-classification/README.ipynb) +> > [Watch the video in YouTube
![thumbnail](http://img.youtube.com/vi/zImQf91ffFo/0.jpg)](https://youtu.be/zImQf91ffFo) This model uses satellite data to classify what is on Earth. The satellite data comes from [Google Earth Engine.](https://earthengine.google.com/) @@ -81,3 +87,22 @@ This model uses satellite data to classify what is on Earth. The satellite data [Earth Engine]: https://earthengine.google.com/ [TensorFlow]: https://www.tensorflow.org/ [Vertex AI]: https://cloud.google.com/vertex-ai + +--- + +## 🌦 Weather forecasting -- _timeseries regression_ + +> [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/README.ipynb) + + +This model uses satellite data to forecast precipitation for the next 2 and 6 hours. The satellite data comes from [Google Earth Engine.](https://earthengine.google.com/) + +* **Model**: 2D Fully Convolutional Network in [PyTorch] +* **Creating datasets**: [Sentinel-2] satellite data and [ESA WorldCover] from [Earth Engine] with [Dataflow] +* **Training the model**: [PyTorch] in [Vertex AI] +* **Getting predictions**: [PyTorch] locally + +[Dataflow]: https://cloud.google.com/dataflow +[Earth Engine]: https://earthengine.google.com/ +[PyTorch]: https://pytorch.org/ +[Vertex AI]: https://cloud.google.com/vertex-ai diff --git a/people-and-planet-ai/conftest.py b/people-and-planet-ai/conftest.py index 765a07ca6e72..fed54feb9b9d 100644 --- a/people-and-planet-ai/conftest.py +++ b/people-and-planet-ai/conftest.py @@ -14,18 +14,19 @@ from __future__ import annotations -from datetime import datetime -import multiprocessing import os import platform import re import subprocess -from unittest import mock +import sys +import textwrap import uuid from collections.abc import Callable, Iterable +from datetime import datetime +from unittest import mock -from google.cloud import storage import pytest +from google.cloud import storage @pytest.fixture(scope="session") @@ -202,9 +203,9 @@ def run_notebook( skip_shell_commands: bool = False, until_end: bool = False, ) -> None: + import nbformat from nbclient.client import NotebookClient from nbclient.exceptions import CellExecutionError - import nbformat def notebook_filter_section( start: str, @@ -262,13 +263,7 @@ def notebook_filter_section( cmd = "pass" cell["source"] = shell_command_re.sub(cmd, cell["source"]) else: - cmd = [ - "import subprocess", - "_cmd = f'''\\1'''", - "print(f'>> {_cmd}')", - "subprocess.run(_cmd, shell=True, check=True)", - ] - cell["source"] = shell_command_re.sub("\n".join(cmd), cell["source"]) + cell["source"] = shell_command_re.sub(r"_run(f'''\1''')", cell["source"]) # Apply variable substitutions. for regex, new_value in compiled_substitutions: @@ -278,8 +273,38 @@ def notebook_filter_section( for old, new in replace.items(): cell["source"] = cell["source"].replace(old, new) + # Clear outputs. + cell["outputs"] = [] + # Prepend the prelude cell. - nb.cells = [nbformat.v4.new_code_cell(prelude)] + nb.cells + prelude_src = textwrap.dedent( + """\ + def _run(cmd): + import subprocess as _sp + import sys as _sys + _p = _sp.run(cmd, shell=True, stdout=_sp.PIPE, stderr=_sp.PIPE) + _stdout = _p.stdout.decode('utf-8').strip() + _stderr = _p.stderr.decode('utf-8').strip() + if _stdout: + print(f'➜ !{cmd}') + print(_stdout) + if _stderr: + print(f'➜ !{cmd}', file=_sys.stderr) + print(_stderr, file=_sys.stderr) + if _p.returncode: + raise RuntimeError('\\n'.join([ + f"Command returned non-zero exit status {_p.returncode}.", + f"-------- command --------", + f"{cmd}", + f"-------- stderr --------", + f"{_stderr}", + f"-------- stdout --------", + f"{_stdout}", + ])) + """ + + prelude + ) + nb.cells = [nbformat.v4.new_code_cell(prelude_src)] + nb.cells # Run the notebook. error = "" @@ -289,7 +314,16 @@ def notebook_filter_section( except CellExecutionError as e: # Remove colors and other escape characters to make it easier to read in the logs. # https://stackoverflow.com/a/33925425 - error = re.sub(r"(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]", "", str(e)) + color_chars = re.compile(r"(\x9B|\x1B\[)[0-?]*[ -\/]*[@-~]") + error = color_chars.sub("", str(e)) + for cell in nb.cells: + if cell["cell_type"] != "code": + continue + for output in cell["outputs"]: + if output.get("name") == "stdout": + print(color_chars.sub("", output["text"])) + elif output.get("name") == "stderr": + print(color_chars.sub("", output["text"]), file=sys.stderr) if error: raise RuntimeError( @@ -305,6 +339,8 @@ def run_notebook_parallel( replace: dict[str, str] = {}, skip_shell_commands: bool = False, ) -> None: + import multiprocessing + args = [ { "ipynb_file": ipynb_file, diff --git a/people-and-planet-ai/weather-forecasting/.gitignore b/people-and-planet-ai/weather-forecasting/.gitignore new file mode 100644 index 000000000000..80dbcaa43747 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/.gitignore @@ -0,0 +1,7 @@ +**/*.egg-info +**/build +**/dist +checkpoints/ +data/ +data-training/ +model/ diff --git a/people-and-planet-ai/weather-forecasting/README.md b/people-and-planet-ai/weather-forecasting/README.md new file mode 100644 index 000000000000..f1404762eb32 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/README.md @@ -0,0 +1,16 @@ +## 🌦 Weather forecasting -- _timeseries regression_ + +> [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/README.ipynb) + + +This model uses satellite data to forecast precipitation for the next 2 and 6 hours. The satellite data comes from [Google Earth Engine.](https://earthengine.google.com/) + +* **Model**: 2D Fully Convolutional Network in [PyTorch] +* **Creating datasets**: [Sentinel-2] satellite data and [ESA WorldCover] from [Earth Engine] with [Dataflow] +* **Training the model**: [PyTorch] in [Vertex AI] +* **Getting predictions**: [PyTorch] locally + +[Dataflow]: https://cloud.google.com/dataflow +[Earth Engine]: https://earthengine.google.com/ +[PyTorch]: https://pytorch.org/ +[Vertex AI]: https://cloud.google.com/vertex-ai diff --git a/people-and-planet-ai/weather-forecasting/create_dataset.py b/people-and-planet-ai/weather-forecasting/create_dataset.py new file mode 100644 index 000000000000..99307e233ffd --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/create_dataset.py @@ -0,0 +1,237 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Creates a dataset to train a machine learning model.""" + +from __future__ import annotations + +from collections.abc import Iterator +from datetime import datetime, timedelta +import logging +import random +from typing import List, Optional +import uuid + +import apache_beam as beam +from apache_beam.io.filesystems import FileSystems +from apache_beam.options.pipeline_options import PipelineOptions +import ee +import numpy as np +import requests + +# Default values. +NUM_DATES = 100 +MAX_REQUESTS = 20 # default EE request quota +MIN_BATCH_SIZE = 100 + +# Constants. +NUM_BINS = 10 +MAX_PRECIPITATION = 30 # found empirically +MAX_ELEVATION = 6000 # found empirically +PATCH_SIZE = 5 +START_DATE = datetime(2017, 7, 10) +END_DATE = datetime.now() - timedelta(days=30) +POLYGON = [(-140.0, 60.0), (-140.0, -60.0), (-10.0, -60.0), (-10.0, 60.0)] + + +def sample_points(date: datetime, num_bins: int = NUM_BINS) -> Iterator[tuple]: + """Selects around the same number of points for every classification. + + Since our labels are numeric continuous values, we convert them into + integers within a predifined range. Each integer value is treated + as a different classification. + + From analyzing the precipitation data, most values are within 0 mm/hr + and 30 mm/hr of precipitation (rain and snow), but values above 30 mm/hr + still exist. So we clamp them to values to between 0 mm/hr and 30 mm/hr, + and then bucketize them. + + We do the same for the elevation, and finally get a "unique" bin number + by combining the precipitationd and elevation bins. We do this because + most of the precipitation values fall under elevation zero, so the data + would be extremely biased. + + Args: + date: The date of interest. + num_bins: Number of bins to bucketize values. + + Yields: (date, lon_lat) pairs. + """ + from weather import data + + precipitation_bins = ( + data.get_gpm(date) + .clamp(0, MAX_PRECIPITATION) + .divide(MAX_PRECIPITATION) + .multiply(num_bins - 1) + .uint8() + ) + elevation_bins = ( + data.get_elevation() + .clamp(0, MAX_ELEVATION) + .divide(MAX_ELEVATION) + .multiply(num_bins - 1) + .uint8() + ) + unique_bins = elevation_bins.multiply(num_bins).add(precipitation_bins) + points = unique_bins.stratifiedSample( + numPoints=1, + region=ee.Geometry.Polygon(POLYGON), + scale=data.SCALE, + geometries=True, + ) + for point in points.toList(points.size()).getInfo(): + yield (date, point["geometry"]["coordinates"]) + + +def get_training_example( + date: datetime, point: tuple, patch_size: int = PATCH_SIZE +) -> tuple: + """Gets an (inputs, labels) training example. + + Args: + date: The date of interest. + point: A (longitude, latitude) coordinate. + patch_size: Size in pixels of the surrounding square patch. + + Returns: An (inputs, labels) pair of NumPy arrays. + """ + from weather import data + + return ( + data.get_inputs_patch(date, point, patch_size), + data.get_labels_patch(date, point, patch_size), + ) + + +def try_get_example(date: datetime, point: tuple) -> Iterator[tuple]: + """Wrapper over `get_training_examples` that allows it to simply log errors instead of crashing.""" + try: + yield get_training_example(date, point) + except (requests.exceptions.HTTPError, ee.ee_exception.EEException) as e: + logging.error(f"🛑 failed to get example: {date} {point}") + logging.exception(e) + + +def write_npz(batch: list[tuple[np.ndarray, np.ndarray]], data_path: str) -> str: + """Writes an (inputs, labels) batch into a compressed NumPy file. + + Args: + batch: Batch of (inputs, labels) pairs of NumPy arrays. + data_path: Directory path to save files to. + + Returns: The filename of the data file. + """ + filename = FileSystems.join(data_path, f"{uuid.uuid4()}.npz") + with FileSystems.create(filename) as f: + inputs = [x for (x, _) in batch] + labels = [y for (_, y) in batch] + np.savez_compressed(f, inputs=inputs, labels=labels) + logging.info(filename) + return filename + + +def run( + data_path: str, + num_dates: int = NUM_DATES, + num_bins: int = NUM_BINS, + max_requests: int = MAX_REQUESTS, + min_batch_size: int = MIN_BATCH_SIZE, + beam_args: Optional[List[str]] = None, +) -> None: + """Runs an Apache Beam pipeline to create a dataset. + + This fetches data from Earth Engine and writes compressed NumPy files. + We use `max_requests` to limit the number of concurrent requests to Earth Engine + to avoid quota issues. You can request for an increas of quota if you need it. + + Args: + data_path: Directory path to save the data files. + num_dates: Number of dates to extract data points from. + num_bins: Number of bins to bucketize values. + max_requests: Limit the number of concurrent requests to Earth Engine. + min_batch_size: Minimum number of examples to write per data file. + beam_args: Apache Beam command line arguments to parse as pipeline options. + """ + random_dates = [ + START_DATE + (END_DATE - START_DATE) * random.random() for _ in range(num_dates) + ] + + beam_options = PipelineOptions( + beam_args, + save_main_session=True, + direct_num_workers=max(max_requests, MAX_REQUESTS), # direct runner + max_num_workers=max_requests, # distributed runners + ) + with beam.Pipeline(options=beam_options) as pipeline: + ( + pipeline + | "📆 Random dates" >> beam.Create(random_dates) + | "📌 Sample points" >> beam.FlatMap(sample_points, num_bins) + | "🃏 Reshuffle" >> beam.Reshuffle() + | "📑 Get example" >> beam.FlatMapTuple(try_get_example) + | "🗂️ Batch examples" >> beam.BatchElements(min_batch_size) + | "📝 Write NPZ files" >> beam.Map(write_npz, data_path) + ) + + +def main() -> None: + import argparse + + logging.getLogger().setLevel(logging.INFO) + + parser = argparse.ArgumentParser() + parser.add_argument( + "--data-path", + required=True, + help="Directory path to save the data files", + ) + parser.add_argument( + "--num-dates", + type=int, + default=NUM_DATES, + help="Number of dates to extract data points from.", + ) + parser.add_argument( + "--num-bins", + type=int, + default=NUM_BINS, + help="Number of bins to bucketize values.", + ) + parser.add_argument( + "--max-requests", + type=int, + default=MAX_REQUESTS, + help="Limit the number of concurrent requests to Earth Engine.", + ) + parser.add_argument( + "--min-batch-size", + type=int, + default=MIN_BATCH_SIZE, + help="Minimum number of examples to write per data file.", + ) + args, beam_args = parser.parse_known_args() + + run( + data_path=args.data_path, + num_dates=args.num_dates, + num_bins=args.num_bins, + max_requests=args.max_requests, + min_batch_size=args.min_batch_size, + beam_args=beam_args, + ) + + +if __name__ == "__main__": + main() diff --git a/people-and-planet-ai/weather-forecasting/images/elevation.png b/people-and-planet-ai/weather-forecasting/images/elevation.png new file mode 100644 index 000000000000..de1eb6cc35fe Binary files /dev/null and b/people-and-planet-ai/weather-forecasting/images/elevation.png differ diff --git a/people-and-planet-ai/weather-forecasting/images/goes16.png b/people-and-planet-ai/weather-forecasting/images/goes16.png new file mode 100644 index 000000000000..7c2e671d4b7b Binary files /dev/null and b/people-and-planet-ai/weather-forecasting/images/goes16.png differ diff --git a/people-and-planet-ai/weather-forecasting/images/gpm.png b/people-and-planet-ai/weather-forecasting/images/gpm.png new file mode 100644 index 000000000000..d0362ea318fb Binary files /dev/null and b/people-and-planet-ai/weather-forecasting/images/gpm.png differ diff --git a/people-and-planet-ai/weather-forecasting/images/inputs.png b/people-and-planet-ai/weather-forecasting/images/inputs.png new file mode 100644 index 000000000000..b471a0b6cfbd Binary files /dev/null and b/people-and-planet-ai/weather-forecasting/images/inputs.png differ diff --git a/people-and-planet-ai/weather-forecasting/images/labels.png b/people-and-planet-ai/weather-forecasting/images/labels.png new file mode 100644 index 000000000000..7bf81a096a2d Binary files /dev/null and b/people-and-planet-ai/weather-forecasting/images/labels.png differ diff --git a/people-and-planet-ai/weather-forecasting/notebooks/1-overview.ipynb b/people-and-planet-ai/weather-forecasting/notebooks/1-overview.ipynb new file mode 100644 index 000000000000..88ddea4c44c8 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/notebooks/1-overview.ipynb @@ -0,0 +1,588 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "g4jtzXwEvW2-" + }, + "outputs": [], + "source": [ + "#@title ###### Licensed to the Apache Software Foundation (ASF), Version 2.0 (the \"License\")\n", + "\n", + "# Licensed to the Apache Software Foundation (ASF) under one\n", + "# or more contributor license agreements. See the NOTICE file\n", + "# distributed with this work for additional information\n", + "# regarding copyright ownership. The ASF licenses this file\n", + "# to you under the Apache License, Version 2.0 (the\n", + "# \"License\"); you may not use this file except in compliance\n", + "# with the License. You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing,\n", + "# software distributed under the License is distributed on an\n", + "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n", + "# KIND, either express or implied. See the License for the\n", + "# specific language governing permissions and limitations\n", + "# under the License." + ], + "id": "g4jtzXwEvW2-" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HtysPAVSvcMg" + }, + "source": [ + "# 🌦️ Weather forecasting\n", + "\n", + "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/1-overview.ipynb)\n", + "\n", + "This sample is broken into the following notebooks:\n", + "\n", + "* ![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧭 Overview**:\n", + " Go through what we want to achieve, and explore the data we want to use as _inputs and outputs_ for our model.\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🗄️ Create the dataset**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/2-dataset.ipynb):\n", + " Use [Apache Beam](https://beam.apache.org/)\n", + " to fetch data from [Earth Engine](https://earthengine.google.com/) in parallel, and create a dataset for our model in [Dataflow](https://cloud.google.com/dataflow).\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧠 Train the model**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb):\n", + " Build a simple _Fully Convolutional Network_ in [PyTorch](https://pytorch.org/) and train it in [Vertex AI](https://cloud.google.com/vertex-ai/docs/training/custom-training) with the dataset we created.\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🔮 Model predictions**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/4-predictions.ipynb):\n", + " Get predictions from the model with data it has never seen before.\n", + "\n", + "This sample leverages geospatial satellite and precipitation data from [Google Earth Engine](https://earthengine.google.com/).\n", + "Using satellite imagery, you'll build and train a model for rain \"nowcasting\" i.e. predicting the amount of rainfall for a given geospatial region and time in the immediate future.\n", + "\n", + "* ⏲️ **Time estimate**: ~5 minutes\n", + "* 💰 **Cost estimate**: _free_\n", + "\n", + "💚 This is one of many **machine learning how-to samples** inspired from **real climate solutions** aired on the [People and Planet AI 🎥 series](https://www.youtube.com/playlist?list=PLIivdWyY5sqI-llB35Dcb187ZG155Rs_7)." + ], + "id": "HtysPAVSvcMg" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AENPVmeYwqml" + }, + "source": [ + "## 📒 Using this interactive notebook\n", + "\n", + "Click the **run** icons ▶️ of each section within this notebook.\n", + "\n", + "![Run cell](images/run-cell.png)\n", + "\n", + "> 💡 Alternatively, you can run the currently selected cell with `Ctrl + Enter` (or `⌘ + Enter` in a Mac).\n", + "\n", + "This **notebook code lets you train and deploy an ML model** from end-to-end. When you run a code cell, the code runs in the notebook's runtime, so you're not making any changes to your personal computer.\n", + "\n", + "> ⚠️ **To avoid any errors**, wait for each section to finish in their order before clicking the next “run” icon.\n", + "\n", + "This sample must be connected to a **Google Cloud project**, but nothing else is needed other than your Google Cloud project.\n", + "\n", + "You can use an _existing project_. Alternatively, you can create a new Cloud project [with cloud credits for free.](https://cloud.google.com/free/docs/gcp-free-tier)" + ], + "id": "AENPVmeYwqml" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "aCDdVrxbw8je" + }, + "source": [ + "# 🎬 Before you begin\n", + "\n", + "Let's start by cloning the GitHub repository, and installing some dependencies." + ], + "id": "aCDdVrxbw8je" + }, + { + "cell_type": "code", + "source": [ + "# Now let's get the code from GitHub and navigate to the sample.\n", + "!git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git\n", + "%cd python-docs-samples/people-and-planet-ai/weather-forecasting" + ], + "metadata": { + "id": "W-fPxkYD9FaP" + }, + "id": "W-fPxkYD9FaP", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "# Upgrade `setuptools` to install packages from pyproject.toml files.\n", + "!pip install --quiet --upgrade --no-warn-conflicts pip setuptools\n", + "\n", + "# Install the `weather-data` local package.\n", + "!pip install serving/weather-data" + ], + "metadata": { + "id": "AlcsK6pd-x0I" + }, + "id": "AlcsK6pd-x0I", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mHvEEW6oyFGV" + }, + "source": [ + "## ☁️ My Google Cloud resources\n", + "\n", + "Make sure you have followed these steps to configure your Google Cloud project:\n", + "\n", + "1. Enable the APIs: _Earth Engine_\n", + "\n", + " \n", + "\n", + "1. Register your\n", + " [Compute Engine default service account](https://console.cloud.google.com/iam-admin/iam)\n", + " on Earth Engine.\n", + "\n", + " \n", + "\n", + "Once you have everything ready, you can go ahead and fill in your Google Cloud resources in the following code cell.\n", + "Make sure you run it!" + ], + "id": "mHvEEW6oyFGV" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "YMPNUR0pyRvy" + }, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "import os\n", + "from google.colab import auth\n", + "\n", + "# Please fill in these values.\n", + "project = \"\" #@param {type:\"string\"}\n", + "\n", + "# Quick input validations.\n", + "assert project, \"⚠️ Please provide a Google Cloud project ID\"\n", + "\n", + "# Authenticate to Colab.\n", + "auth.authenticate_user()\n", + "\n", + "# Set GOOGLE_CLOUD_PROJECT for google.auth.default().\n", + "os.environ['GOOGLE_CLOUD_PROJECT'] = project\n", + "\n", + "# Set the gcloud project for other gcloud commands.\n", + "!gcloud config set project {project}" + ], + "id": "YMPNUR0pyRvy" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "bp05WbBM596J" + }, + "source": [ + "# 🧭 Overview\n", + "\n", + "The goal of our model is using satellite images to do _weather forecasting_.\n", + "Specifically, we want to predict the amount of rainfall, measured in millimeters per hour, for the next two to six hours in the future.\n", + "This kind of short term forecasting is called [weather _nowcasting_](https://en.wikipedia.org/wiki/Nowcasting_(meteorology)).\n", + "\n", + "When working with satellite data, each image has the shape `(width, height, bands)`.\n", + "**Bands** contain _numeric values_ for each pixel in the image, like the measurements from specific satellite instruments for different ranges of the electromagnetic spectrum, or the probabilities of different classifications.\n", + "If you're familiar with image classification problems, you can think of the bands as similar to an image's RGB channels." + ], + "id": "bp05WbBM596J" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "n6X3DTeYYAXM" + }, + "source": [ + "## ☔️ Precipitation\n", + "\n", + "We use [NASA's Global Precipitation Measurement (GPM)](https://developers.google.com/earth-engine/datasets/catalog/NASA_GPM_L3_IMERG_V06) to get the amount of _precipitation_ of rain and snow, measured as millimeters per hour.\n", + "We're interested in the `precipitationCal` band, which gives us the _calibrated_ precipitation amount.\n", + "\n", + "This is what we want to predict, so we'll use them for our _labels_.\n", + "But it's also useful for the model to look at the precipitation from the _past_, so we'll also use it as _inputs_.\n", + "\n", + "In the [`serving/data.py`](serving/data.py) file, we defined a function called `get_gpm_sequence` which returns us an `ee.Image` with the precipitation values for the time sequence we give it.\n", + "Each time step is stored in a different band with the index as a prefix.\n", + "For example, the band corresponding to the first time step in the sequence would be `0_precipitationCal`, and the second time step would be `1_precipitationCal`." + ], + "id": "n6X3DTeYYAXM" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "57gC1gfC9_Gc" + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "import folium\n", + "import ee\n", + "\n", + "from weather.data import get_gpm_sequence\n", + "\n", + "def gpm_layer(image: ee.Image, label: str, i: int) -> folium.TileLayer:\n", + " vis_params = {\n", + " \"bands\": [f\"{i}_precipitationCal\"],\n", + " \"min\": 0.0,\n", + " \"max\": 20.0,\n", + " \"palette\": [\n", + " '000096', '0064ff', '00b4ff', '33db80', '9beb4a',\n", + " 'ffeb00', 'ffb300', 'ff6400', 'eb1e00', 'af0000',\n", + " ],\n", + " }\n", + " # Mask (hide) pixels with no precipitation to see the map below.\n", + " image = image.mask(image.gt(0.1))\n", + " return folium.TileLayer(\n", + " name=f\"[{label}] Precipitation\",\n", + " tiles=image.getMapId(vis_params)[\"tile_fetcher\"].url_format,\n", + " attr='Map Data © Google Earth Engine',\n", + " overlay = True,\n", + " )\n", + "\n", + "# Get the Earth Engine images.\n", + "dates = [datetime(2019, 9, 2, 18)]\n", + "image = get_gpm_sequence(dates)\n", + "\n", + "# Show map.\n", + "map = folium.Map([25, -90], zoom_start=5)\n", + "for i, date in enumerate(dates):\n", + " gpm_layer(image, str(date), i).add_to(map)\n", + "folium.LayerControl().add_to(map)\n", + "map" + ], + "id": "57gC1gfC9_Gc" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ztJlKSjlMGAc" + }, + "source": [ + "![Global Precipitation Measurement (GPM)](images/gpm.png)\n", + "\n", + "> 💡 This is [Hurricane Dorian](https://en.wikipedia.org/wiki/Hurricane_Dorian), the strongest Category 5 hurricane on record in the Bahamas." + ], + "id": "ztJlKSjlMGAc" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "y3NRvQndX66i" + }, + "source": [ + "## 🌨 Cloud and moisture\n", + "\n", + "To predict precipitation, it's also useful to take a look at the _cloud_ and _moisture_.\n", + "We use data from [GOES-16 Cloud and Moisture Imagery](https://developers.google.com/earth-engine/datasets/catalog/NOAA_GOES_16_MCMIPF), which was the first satellite from the [Geostationary Operational Environmental Satellites (GOES)](https://en.wikipedia.org/wiki/Geostationary_Operational_Environmental_Satellite) mission, operated by [NASA](https://en.wikipedia.org/wiki/NASA) and [NOAA](https://en.wikipedia.org/wiki/National_Oceanic_and_Atmospheric_Administration).\n", + "It includes measurements from the _visible_, _near-infrared_, and _infrared_ spectrum.\n", + "It is a [geostationary](https://en.wikipedia.org/wiki/Geostationary_orbit) satellite, so its orbit is synchronized with the Earth's rotation, and it provides a view centered in the Americas.\n", + "\n", + "In the [`serving/data.py`](serving/data.py) file, we defined a function called `get_goes16_sequence` which returns us an `ee.Image` with the cloud and moisture data for the time sequence we give it." + ], + "id": "y3NRvQndX66i" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "-T5BGzzZH57f" + }, + "outputs": [], + "source": [ + "from datetime import datetime\n", + "import folium\n", + "import ee\n", + "\n", + "from weather.data import get_goes16_sequence\n", + "\n", + "def goes16_layer(image: ee.Image, label: str, i: int) -> folium.TileLayer:\n", + " vis_params = {\n", + " \"bands\": [f\"{i}_CMI_C02\", f\"{i}_CMI_C03\", f\"{i}_CMI_C01\"],\n", + " \"min\": 0.0,\n", + " \"max\": 3000.0,\n", + " }\n", + " return folium.TileLayer(\n", + " name=f\"[{label}] Cloud and moisture\",\n", + " tiles=image.getMapId(vis_params)[\"tile_fetcher\"].url_format,\n", + " attr='Map Data © Google Earth Engine',\n", + " overlay = True,\n", + " )\n", + "\n", + "# Get the Earth Engine image.\n", + "dates = [datetime(2019, 9, 2, 18)]\n", + "image = get_goes16_sequence(dates)\n", + "\n", + "# Show map.\n", + "map = folium.Map([25, -90], zoom_start=5)\n", + "for i, date in enumerate(dates):\n", + " goes16_layer(image, str(date), i).add_to(map)\n", + "folium.LayerControl().add_to(map)\n", + "map" + ], + "id": "-T5BGzzZH57f" + }, + { + "cell_type": "markdown", + "source": [ + "![GOES 16](images/goes16.png)" + ], + "metadata": { + "id": "WdpLS9eYaltD" + }, + "id": "WdpLS9eYaltD" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "gqUhsl1UE2Xs" + }, + "source": [ + "## 🏔 Elevation\n", + "\n", + "Elevation could also give the model useful information.\n", + "We use the [MERIT Terrain DEM](https://developers.google.com/earth-engine/datasets/catalog/MERIT_DEM_v1_0_3) dataset to get the elvation.\n", + "\n", + "In the [`serving/data.py`](serving/data.py) file, we defined a function called `get_elevation` which returns us an `ee.Image` with the elevation measured in meters." + ], + "id": "gqUhsl1UE2Xs" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "vv_HM9f-KExw" + }, + "outputs": [], + "source": [ + "import folium\n", + "\n", + "from weather.data import get_elevation\n", + "\n", + "def elevation_layer() -> folium.TileLayer:\n", + " image = get_elevation()\n", + " vis_params = {\n", + " \"bands\": [\"elevation\"],\n", + " \"min\": 0.0,\n", + " \"max\": 3000.0,\n", + " \"palette\": ['000000', '478FCD', '86C58E', 'AFC35E', '8F7131', 'B78D4F', 'E2B8A6', 'FFFFFF']\n", + " }\n", + " return folium.TileLayer(\n", + " name=\"Elevation\",\n", + " tiles=image.getMapId(vis_params)[\"tile_fetcher\"].url_format,\n", + " attr='Map Data © Google Earth Engine',\n", + " overlay = True,\n", + " )\n", + "\n", + "# Show map.\n", + "map = folium.Map([25, -90], zoom_start=5)\n", + "elevation_layer().add_to(map)\n", + "folium.LayerControl().add_to(map)\n", + "map" + ], + "id": "vv_HM9f-KExw" + }, + { + "cell_type": "markdown", + "source": [ + "![Elevation](images/elevation.png)" + ], + "metadata": { + "id": "KfueoVPBapZp" + }, + "id": "KfueoVPBapZp" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "AO4CyGgjYOME" + }, + "source": [ + "## 🛰 Inputs\n", + "\n", + "In this example, we also consider multiple images across time, since weather forecasting is more accurate when we look at how the cloud cover changes over a period of time.\n", + "Particularly, we consider 3 data points -- 4 hours prior, 2 hours prior, and current.\n", + "\n", + "> 💡 To give the model a better picture, we chose to feed it with _at least three_ data points from the past.\n", + "> With only a single point, the model wouldn't know if the rain is increasing or decreasing.\n", + "> Two points would give it an idea of the trend.\n", + "> Three or more points would give it an idea of how fast it's changing.\n", + "> The more points, the more it can see.\n", + "\n", + "In the [`serving/data.py`](serving/data.py) file, we defined a function called `get_inputs_image` which returns us an `ee.Image` with bands for all the time steps for cloud and moisture, and for precipitation, alongside with the elevation." + ], + "id": "AO4CyGgjYOME" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "5SbSErwSUCtZ" + }, + "outputs": [], + "source": [ + "from datetime import datetime, timedelta\n", + "import folium\n", + "\n", + "from weather.data import get_inputs_image\n", + "\n", + "# Get the Earth Engine image.\n", + "date = datetime(2019, 9, 2, 18)\n", + "image = get_inputs_image(date)\n", + "\n", + "# Get 4 hours prior, 2 hours prior, and current time.\n", + "input_hour_deltas = [-4, -2, 0]\n", + "\n", + "# Show map.\n", + "map = folium.Map([25, -90], zoom_start=5)\n", + "elevation_layer().add_to(map)\n", + "for i, h in enumerate(input_hour_deltas):\n", + " label = str(date + timedelta(hours=h))\n", + " goes16_layer(image, label, i).add_to(map)\n", + " gpm_layer(image, label, i).add_to(map)\n", + "folium.LayerControl().add_to(map)\n", + "map" + ], + "id": "5SbSErwSUCtZ" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IMOd2hVAp2h9" + }, + "source": [ + "![Inputs](images/inputs.png)\n", + "\n", + "> 💡 You can hide and show layers from the top-right corner widget to see all the inputs for the model." + ], + "id": "IMOd2hVAp2h9" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "kRZlrlaXYRA0" + }, + "source": [ + "## ✅ Labels\n", + "\n", + "We chose to predict precipitation for 2 and 6 hours in the future, but it could be anything as long as we have the right _labels_.\n", + "\n", + "In the [`serving/data.py`](serving/data.py) file, we defined a function called `get_labels_image` which returns us an `ee.Image` with bands for each time step of precipitation." + ], + "id": "kRZlrlaXYRA0" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "ppYBbnWyWGCR" + }, + "outputs": [], + "source": [ + "from datetime import datetime, timedelta\n", + "import folium\n", + "\n", + "from weather.data import get_labels_image, OUTPUT_HOUR_DELTAS\n", + "\n", + "# Get the Earth Engine image.\n", + "date = datetime(2019, 9, 3, 18)\n", + "image = get_labels_image(date)\n", + "\n", + "# Show map.\n", + "map = folium.Map([25, -90], zoom_start=5)\n", + "for i, h in enumerate(OUTPUT_HOUR_DELTAS):\n", + " label = str(date + timedelta(hours=h))\n", + " gpm_layer(image, label, i).add_to(map)\n", + "folium.LayerControl().add_to(map)\n", + "map" + ], + "id": "ppYBbnWyWGCR" + }, + { + "cell_type": "markdown", + "source": [ + "![Labels](images/labels.png)" + ], + "metadata": { + "id": "XdVso6ela0Hz" + }, + "id": "XdVso6ela0Hz" + }, + { + "cell_type": "markdown", + "source": [ + "# ⛳️ What's next?\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🗄️ Create the dataset**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/2-dataset.ipynb):\n", + " Use [Apache Beam](https://beam.apache.org/)\n", + " to fetch data from [Earth Engine](https://earthengine.google.com/) in parallel, and create a dataset for our model in [Dataflow](https://cloud.google.com/dataflow).\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧠 Train the model**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb):\n", + " Build a simple _Fully Convolutional Network_ in [PyTorch](https://pytorch.org/) and train it in [Vertex AI](https://cloud.google.com/vertex-ai/docs/training/custom-training) with the dataset we created.\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🔮 Model predictions**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/4-predictions.ipynb):\n", + " Get predictions from the model with data it has never seen before." + ], + "metadata": { + "id": "jzGYXELPApGN" + }, + "id": "jzGYXELPApGN" + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "environment": { + "kernel": "python3", + "name": "tf2-gpu.2-6.m82", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-6:m82" + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/people-and-planet-ai/weather-forecasting/notebooks/2-dataset.ipynb b/people-and-planet-ai/weather-forecasting/notebooks/2-dataset.ipynb new file mode 100644 index 000000000000..38e3e4c88646 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/notebooks/2-dataset.ipynb @@ -0,0 +1,841 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "g4jtzXwEvW2-" + }, + "outputs": [], + "source": [ + "#@title ###### Licensed to the Apache Software Foundation (ASF), Version 2.0 (the \"License\")\n", + "\n", + "# Licensed to the Apache Software Foundation (ASF) under one\n", + "# or more contributor license agreements. See the NOTICE file\n", + "# distributed with this work for additional information\n", + "# regarding copyright ownership. The ASF licenses this file\n", + "# to you under the Apache License, Version 2.0 (the\n", + "# \"License\"); you may not use this file except in compliance\n", + "# with the License. You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing,\n", + "# software distributed under the License is distributed on an\n", + "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n", + "# KIND, either express or implied. See the License for the\n", + "# specific language governing permissions and limitations\n", + "# under the License." + ], + "id": "g4jtzXwEvW2-" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HtysPAVSvcMg" + }, + "source": [ + "# 🌦️ Weather forecasting\n", + "\n", + "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/2-dataset.ipynb)\n", + "\n", + "This sample is broken into the following notebooks:\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧭 Overview**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/1-overview.ipynb):\n", + " Go through what we want to achieve, and explore the data we want to use as _inputs and outputs_ for our model.\n", + "\n", + "* ![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🗄️ Create the dataset**:\n", + " Use [Apache Beam](https://beam.apache.org/) to fetch data from [Earth Engine](https://earthengine.google.com/) in parallel, and create a dataset for our model in [Dataflow](https://cloud.google.com/dataflow).\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧠 Train the model**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb):\n", + " Build a simple _Fully Convolutional Network_ in [PyTorch](https://pytorch.org/) and train it in [Vertex AI](https://cloud.google.com/vertex-ai/docs/training/custom-training) with the dataset we created.\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🔮 Model predictions**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/4-predictions.ipynb):\n", + " Get predictions from the model with data it has never seen before.\n", + "\n", + "This sample leverages geospatial satellite and precipitation data from [Google Earth Engine](https://earthengine.google.com/).\n", + "Using satellite imagery, you'll build and train a model for rain \"nowcasting\" i.e. predicting the amount of rainfall for a given geospatial region and time in the immediate future.\n", + "\n", + "* ⏲️ **Time estimate**: ~30 minutes\n", + "* 💰 **Cost estimate**: [a few cents on Dataflow](https://cloud.google.com/dataflow/pricing)\n", + "\n", + "💚 This is one of many **machine learning how-to samples** inspired from **real climate solutions** aired on the [People and Planet AI 🎥 series](https://www.youtube.com/playlist?list=PLIivdWyY5sqI-llB35Dcb187ZG155Rs_7)." + ], + "id": "HtysPAVSvcMg" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RuFZck60B8t-" + }, + "source": [ + "# 🎬 Before you begin\n", + "\n", + "Let's start by cloning the GitHub repository, and installing some dependencies." + ], + "id": "RuFZck60B8t-" + }, + { + "cell_type": "code", + "source": [ + "# Now let's get the code from GitHub and navigate to the sample.\n", + "!git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git\n", + "%cd python-docs-samples/people-and-planet-ai/weather-forecasting" + ], + "metadata": { + "id": "W-fPxkYD9FaP" + }, + "execution_count": null, + "outputs": [], + "id": "W-fPxkYD9FaP" + }, + { + "cell_type": "markdown", + "source": [ + "The [`weather-data`](serving/weather-data) local package contains the functions to get data from Earth Engine.\n", + "It is used for both creating the training dataset, and for predictions." + ], + "metadata": { + "id": "3zK5C00LIXIv" + }, + "id": "3zK5C00LIXIv" + }, + { + "cell_type": "code", + "source": [ + "# Upgrade `setuptools` to install packages from pyproject.toml files.\n", + "!pip install --quiet --upgrade --no-warn-conflicts pip setuptools\n", + "\n", + "# We need `build` and `virtualenv` to build the local packages.\n", + "!pip install --quiet build virtualenv\n", + "\n", + "# Install Apache Beam and the `weather-data` local package.\n", + "!pip install apache-beam[gcp] serving/weather-data" + ], + "metadata": { + "id": "AlcsK6pd-x0I" + }, + "execution_count": null, + "outputs": [], + "id": "AlcsK6pd-x0I" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G75Y6HszxBL8" + }, + "source": [ + "> **🛑 Restart the runtime 🛑**\n", + "\n", + "Colab already comes with many dependencies pre-loaded.\n", + "In order to ensure everything runs as expected, we **_must_ restart the runtime**. This allows Colab to load the latest versions of the libraries.\n", + "\n", + "![\"Runtime\" > \"Restart runtime\"](images/restart-runtime.png)" + ], + "id": "G75Y6HszxBL8" + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "id": "xGXRHJ9TFs24" + }, + "outputs": [], + "source": [ + "# Alternatively, restart the runtime by ending the process.\n", + "exit()" + ], + "id": "xGXRHJ9TFs24" + }, + { + "cell_type": "markdown", + "source": [ + "After restarting the runtime, let's navigate back into the sample directory." + ], + "metadata": { + "id": "WI_vvBpPD4tr" + }, + "id": "WI_vvBpPD4tr" + }, + { + "cell_type": "code", + "source": [ + "%cd python-docs-samples/people-and-planet-ai/weather-forecasting" + ], + "metadata": { + "id": "6fdyXMdlD3cz" + }, + "id": "6fdyXMdlD3cz", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mHvEEW6oyFGV" + }, + "source": [ + "## ☁️ My Google Cloud resources\n", + "\n", + "Make sure you have followed these steps to configure your Google Cloud project:\n", + "\n", + "1. Enable the APIs: _Dataflow and Earth Engine_\n", + "\n", + " \n", + "\n", + "1. Create or use an existing Cloud Storage bucket.\n", + "\n", + " \n", + "\n", + "1. Register your\n", + " [Compute Engine default service account](https://console.cloud.google.com/iam-admin/iam)\n", + " on Earth Engine.\n", + "\n", + " \n", + "\n", + "Once you have everything ready, you can go ahead and fill in your Google Cloud resources in the following code cell.\n", + "Make sure you run it!" + ], + "id": "mHvEEW6oyFGV" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "YMPNUR0pyRvy" + }, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "import os\n", + "from google.colab import auth\n", + "\n", + "# Please fill in these values.\n", + "project = \"\" #@param {type:\"string\"}\n", + "bucket = \"\" #@param {type:\"string\"}\n", + "location = \"us-central1\" #@param {type:\"string\"}\n", + "\n", + "# Quick input validations.\n", + "assert project, \"⚠️ Please provide a Google Cloud project ID\"\n", + "assert bucket, \"⚠️ Please provide a Cloud Storage bucket name\"\n", + "assert not bucket.startswith('gs://'), f\"⚠️ Please remove the gs:// prefix from the bucket name: {bucket}\"\n", + "assert location, \"⚠️ Please provide a Google Cloud location\"\n", + "\n", + "# Authenticate to Colab.\n", + "auth.authenticate_user()\n", + "\n", + "# Set GOOGLE_CLOUD_PROJECT for google.auth.default().\n", + "os.environ['GOOGLE_CLOUD_PROJECT'] = project\n", + "\n", + "# Set the gcloud project for other gcloud commands.\n", + "!gcloud config set project {project}" + ], + "id": "YMPNUR0pyRvy" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ud_SLexIZQh0" + }, + "source": [ + "# 🗄 Create the dataset locally\n", + "\n", + "A dataset consists of _training examples_, which are `(inputs, labels)` pairs, so for each input data, we have to give it the correct output values.\n", + "\n", + "We want a _balanced_ dataset consisting on a representative, diverse, and unbiased selection of data points.\n", + "This way the model can learn from many different examples covering different seasons, times of day, regions, ecosystems, etc.\n", + "\n", + "Let's take a closer look at how we select our training examples to create the dataset." + ], + "id": "Ud_SLexIZQh0" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "hWq2BMYMcAEj" + }, + "source": [ + "## 📌 Sample points\n", + "\n", + "First, we want to get balanced points for a given time.\n", + "We use [`ee.Image.stratifiedSample`](https://developers.google.com/earth-engine/apidocs/ee-image-stratifiedsample) to select around the same number of points for each amount of precipitation.\n", + "Also, most of the regions from where we're selecting data points fall under very low elevations, near sea level.\n", + "So it's important to make sure we select data points from different elevations in a balanced way.\n", + "\n", + "Since the precipitation is a continuous value, we first need to convert it to a classification.\n", + "By looking at different images, we noticed that most values fall within 0 and 30.\n", + "So we simply clamped the values into that range, divided by the maximum value, multiplied by the number of bins, and converted them into integers.\n", + "\n", + "We do a similar thing for the elevation, where we found empirically that most values fall between 0 and 6000.\n", + "\n", + "Once we have bins for both precipitation and elevation, we combine them into a single \"unique\" bin value to make sure we get all the possible precipitation values for each elevation.\n", + "\n", + "In [`create_dataset.py`](create_dataset.py) we defined a function called `sample_points` that gives us a balanced selction of `(longitude, latitude)` coordinates for a given date." + ], + "id": "hWq2BMYMcAEj" + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "UXtjqla1dBuX", + "outputId": "6e01e8f6-7b45-4104-91a5-f9f3152438a8" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2019-09-02 18:00:00 -- [-69.5525524841715, -39.82132539507417]\n", + "2019-09-02 18:00:00 -- [-71.4390145808225, 1.9503353164835744]\n", + "2019-09-02 18:00:00 -- [-52.12523597225278, -20.956704428564223]\n", + "2019-09-02 18:00:00 -- [-75.66109641618425, 34.11002248796244]\n", + "2019-09-02 18:00:00 -- [-37.662359897928496, 51.2678444146453]\n", + "2019-09-02 18:00:00 -- [-87.15953205291412, 5.902922566609462]\n", + "2019-09-02 18:00:00 -- [-70.27120471146712, 3.4774712994867656]\n", + "2019-09-02 18:00:00 -- [-45.208208284532475, -25.358449320749884]\n", + "2019-09-02 18:00:00 -- [-121.5650074346918, 8.058879248496325]\n", + "2019-09-02 18:00:00 -- [-127.7633828951165, 12.909781782741732]\n", + "2019-09-02 18:00:00 -- [-110.96488708208145, 53.96279026700387]\n", + "2019-09-02 18:00:00 -- [-50.957426102897415, -25.358449320749884]\n", + "2019-09-02 18:00:00 -- [-63.80333466580656, 2.399492958543334]\n", + "2019-09-02 18:00:00 -- [-50.957426102897415, -25.98727001963354]\n", + "2019-09-02 18:00:00 -- [-47.723491080067134, -22.034682769507654]\n", + "2019-09-02 18:00:00 -- [-71.6186776376464, 18.29967348745886]\n", + "2019-09-02 18:00:00 -- [-115.63612655950296, 38.421935851736144]\n", + "2019-09-02 18:00:00 -- [-71.07968846717469, 19.10815724316643]\n", + "2019-09-02 18:00:00 -- [-71.52884610923445, 18.120010430634963]\n", + "2019-09-02 18:00:00 -- [-71.6186776376464, 18.209841959046912]\n", + "2019-09-02 18:00:00 -- [-111.5937077809651, 45.428795067868414]\n", + "2019-09-02 18:00:00 -- [-101.71223965565036, 19.467483356814242]\n", + "2019-09-02 18:00:00 -- [-91.11211930304002, 14.706412350980784]\n", + "2019-09-02 18:00:00 -- [-65.86945981928146, -24.46013403663035]\n", + "2019-09-02 18:00:00 -- [-91.74094000192368, 15.06573846462858]\n", + "2019-09-02 18:00:00 -- [-91.02228777462807, 14.796243879392733]\n", + "2019-09-02 18:00:00 -- [-68.29491108640417, -20.956704428564223]\n", + "2019-09-02 18:00:00 -- [-69.01356331369979, -27.424574474224784]\n", + "2019-09-02 18:00:00 -- [-71.4390145808225, -15.926138837494904]\n", + "2019-09-02 18:00:00 -- [-68.56440567164003, -24.729628621866212]\n", + "2019-09-02 18:00:00 -- [-68.56440567164003, -27.155079888988922]\n" + ] + } + ], + "source": [ + "from datetime import datetime\n", + "from create_dataset import sample_points\n", + "\n", + "date = datetime(2019, 9, 2, 18)\n", + "for date, point in sample_points(date):\n", + " print(f\"{date} -- {point}\")" + ], + "id": "UXtjqla1dBuX" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SEEz_F5yxI_-" + }, + "source": [ + "> 💡 We only bucketize the precipitation to select a balanced dataset, but we use the original continuous value for the labels." + ], + "id": "SEEz_F5yxI_-" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W5mr765Ahsd5" + }, + "source": [ + "## 📑 Get training examples\n", + "\n", + "The next step is to get our training examples data.\n", + "Sometimes there are transient errors like sending too many requests, so we used [`Retry`](https://googleapis.dev/python/google-api-core/latest/retry.html) to handle those cases.\n", + "\n", + "We predefined that all our training examples would be 5 pixels width by 5 pixels height, but we could choose any size as long as the model accepts it.\n", + "We also want all the training examples to be the same size so we can batch them.\n", + "\n", + "In [`create_dataset.py`](create_dataset.py) we defined `get_training_example`, which fetches an `(inputs, labels)` pair for the given date and (longitude, latitude) coordinate.\n", + "Let's see how a 64x64 patch looks like, since a 5x5 patch will only look like a bunch of random pixels to us." + ], + "id": "W5mr765Ahsd5" + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "A2zgm2cgze3O", + "outputId": "209f3fa8-1e1b-4980-96f9-3824127619bb" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "inputs : float32 (64, 64, 52)\n", + "labels : float32 (64, 64, 2)\n" + ] + } + ], + "source": [ + "from datetime import datetime\n", + "from create_dataset import get_training_example\n", + "\n", + "date = datetime(2019, 9, 2, 18)\n", + "point = [-77.93, 25.23] # [longitude, latitude]\n", + "(inputs, labels) = get_training_example(date, point, patch_size=64)\n", + "\n", + "print(f\"inputs : {inputs.dtype} {inputs.shape}\")\n", + "print(f\"labels : {labels.dtype} {labels.shape}\")" + ], + "id": "A2zgm2cgze3O" + }, + { + "cell_type": "markdown", + "source": [ + "Let's see how the example inputs look like." + ], + "metadata": { + "id": "oC1g2FfkbWge" + }, + "id": "oC1g2FfkbWge" + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 517 + }, + "id": "596JD7WPkQse", + "outputId": "daec247e-dcbd-4006-f18c-ab01ae91ce04" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "from visualize import show_inputs\n", + "\n", + "show_inputs(inputs)" + ], + "id": "596JD7WPkQse" + }, + { + "cell_type": "markdown", + "source": [ + "And these are the labels for that example, corresponding to 2 and 6 hours in the future from the example's time." + ], + "metadata": { + "id": "zoIY9Sl8baIQ" + }, + "id": "zoIY9Sl8baIQ" + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 317 + }, + "id": "y_PJN4znnOAa", + "outputId": "7aa7e607-b9e2-48e1-9f88-2b9ae18cb3ac" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "from visualize import show_outputs\n", + "\n", + "show_outputs(labels)" + ], + "id": "y_PJN4znnOAa" + }, + { + "cell_type": "markdown", + "source": [ + "> 💡 We chose _5x5 patches_ because our Fully Convolutional Model uses a _3x3 kernel_.\n", + "> We want a _balanced_ representation of precipitation, and we did the stratified sampling on the _center_ pixel only.\n", + "> By choosing 5x5 patches with a 3x3 kernel, we make sure the center pixel we chose appears in all 9 positions for the kernel." + ], + "metadata": { + "id": "a8UeWX-ZbUJK" + }, + "id": "a8UeWX-ZbUJK" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "ihD1Y3rUhavO" + }, + "source": [ + "## 📝 Write NumPy files\n", + "\n", + "Finally, we need to write the training examples into files.\n", + "We chose [compressed NumPy files](https://numpy.org/doc/stable/reference/generated/numpy.savez_compressed.html) for simplicity.\n", + "We used Apache Beam [`FileSystems`](https://beam.apache.org/releases/pydoc/current/apache_beam.io.filesystems.html) to be able to write into any file system that Beam supports, including Cloud Storage.\n", + "\n", + "Before writing the examples, we batch them to create files containing multiple examples, rather than a single file per example.\n", + "This reduces I/O operations when reading the dataset during training.\n", + "\n", + "Here, let's create a batch from a single example, but our data creation pipeline will create larger batches." + ], + "id": "ihD1Y3rUhavO" + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 35 + }, + "id": "sOs0xl4MBCqz", + "outputId": "6829925d-822d-4ade-ce89-6e713a3db325" + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'data/c6482680-3d3f-43bd-bdff-e447f600f2b9.npz'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 24 + } + ], + "source": [ + "from create_dataset import write_npz\n", + "\n", + "data_path = \"data/\"\n", + "batch = [(inputs, labels)]\n", + "write_npz(batch, data_path)" + ], + "id": "sOs0xl4MBCqz" + }, + { + "cell_type": "code", + "source": [ + "!ls -lh data" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "tcD44qxkSSya", + "outputId": "2a818de7-128e-4200-f196-f629e698d985" + }, + "id": "tcD44qxkSSya", + "execution_count": 25, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 412K\n", + "-rw-r--r-- 1 root root 412K Jan 11 00:12 c6482680-3d3f-43bd-bdff-e447f600f2b9.npz\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G0-AAvhqB5K_" + }, + "source": [ + "## 🗃 Create the dataset\n", + "\n", + "Finally, we create an\n", + "[Apache Beam](https://beam.apache.org/) pipeline, which allows us to create parallel processing pipelines.\n", + "We can even save directly to [Cloud Storage](https://cloud.google.com/storage).\n", + "\n", + "Let's see how to create a small dataset from a single date!" + ], + "id": "G0-AAvhqB5K_" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "RRl4TsFQB4ZM" + }, + "outputs": [], + "source": [ + "import apache_beam as beam\n", + "from apache_beam.options.pipeline_options import PipelineOptions\n", + "\n", + "data_path = f\"gs://{bucket}/weather/data-small\"\n", + "dates = [datetime(2019, 9, 2, 18)]\n", + "\n", + "beam_options = PipelineOptions([], direct_num_workers=20)\n", + "with beam.Pipeline(options=beam_options) as pipeline:\n", + " (\n", + " pipeline\n", + " | \"📆 Create dates\" >> beam.Create(dates)\n", + " | \"📌 Sample points\" >> beam.FlatMap(sample_points)\n", + " | \"🃏 Reshuffle\" >> beam.Reshuffle()\n", + " | \"📑 Get example\" >> beam.MapTuple(get_training_example)\n", + " | \"🗂️ Batch examples\" >> beam.BatchElements()\n", + " | \"📝 Write NPZ files\" >> beam.Map(write_npz, data_path)\n", + " )" + ], + "id": "RRl4TsFQB4ZM" + }, + { + "cell_type": "markdown", + "source": [ + "Now we can take a look at our data files." + ], + "metadata": { + "id": "2WPw9nK7cTuJ" + }, + "id": "2WPw9nK7cTuJ" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "F43OAIlrDosG" + }, + "outputs": [], + "source": [ + "!gsutil ls -lh gs://{bucket}/weather/data-small" + ], + "id": "F43OAIlrDosG" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YWAI6AetcxRH" + }, + "source": [ + "# ☁️ Create the dataset in Dataflow\n", + "\n", + "Local testing works great for creating small datasets and making sure everything works, but to run on a large dataset at scale it's best to use a distributed runner like\n", + "[Dataflow](https://cloud.google.com/dataflow).\n", + "\n", + "We can run [`create_dataset.py`](create_dataset.py) as a script and run it in [Dataflow](https://cloud.google.com/dataflow).\n", + "You can control the number of dates to sample with `--num-dates` _(default=100)_, and the number of bins to use for the stratified sampling with `--num-bins` _(default=10)_.\n", + "\n", + "We are using the same data extraction functions for both training and prediction.\n", + "This means our Dataflow pipelines needs access to the [`serving/weather-data`](serving/weather-data) module.\n", + "Since it's a local module that does not live in [PyPI](https://pypi.org), we have to first build the module with [`build`](https://pypa-build.readthedocs.io/en/latest) and then include the package for Dataflow." + ], + "id": "YWAI6AetcxRH" + }, + { + "cell_type": "code", + "source": [ + "# Build the `weather-data` package.\n", + "!python -m build serving/weather-data" + ], + "metadata": { + "id": "s8UyBgzVRctp" + }, + "id": "s8UyBgzVRctp", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "!ls -lh serving/weather-data/dist" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "1NtAJBl0TKyE", + "outputId": "516fb9b4-328a-4d41-af2a-028448559882" + }, + "id": "1NtAJBl0TKyE", + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 8.0K\n", + "-rw-r--r-- 1 root root 3.9K Jan 10 23:51 weather_data-1.0.0-py3-none-any.whl\n", + "-rw-r--r-- 1 root root 3.1K Jan 10 23:51 weather-data-1.0.0.tar.gz\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "dMkZE6yedIxx" + }, + "outputs": [], + "source": [ + "!python create_dataset.py \\\n", + " --data-path=\"gs://{bucket}/weather/data\" \\\n", + " --runner=\"DataflowRunner\" \\\n", + " --project=\"{project}\" \\\n", + " --region=\"{location}\" \\\n", + " --temp_location=\"gs://{bucket}/weather/temp\" \\\n", + " --extra_package=\"./serving/weather-data/dist/weather-data-1.0.0.tar.gz\"" + ], + "id": "dMkZE6yedIxx" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "SUXVI-EAew02" + }, + "source": [ + "> 💡 Look at your Dataflow jobs: https://console.cloud.google.com/dataflow/jobs" + ], + "id": "SUXVI-EAew02" + }, + { + "cell_type": "markdown", + "source": [ + "# ⛳️ What's next?\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧠 Train the model**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb):\n", + " Build a simple _Fully Convolutional Network_ in [PyTorch](https://pytorch.org/) and train it in [Vertex AI](https://cloud.google.com/vertex-ai/docs/training/custom-training) with the dataset we created.\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🔮 Model predictions**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/4-predictions.ipynb):\n", + " Get predictions from the model with data it has never seen before." + ], + "metadata": { + "id": "rv12FbfdFUHo" + }, + "id": "rv12FbfdFUHo" + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "environment": { + "kernel": "python3", + "name": "tf2-gpu.2-6.m82", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-6:m82" + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb b/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb new file mode 100644 index 000000000000..d84619aebc32 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb @@ -0,0 +1,1450 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "g4jtzXwEvW2-" + }, + "outputs": [], + "source": [ + "#@title ###### Licensed to the Apache Software Foundation (ASF), Version 2.0 (the \"License\")\n", + "\n", + "# Licensed to the Apache Software Foundation (ASF) under one\n", + "# or more contributor license agreements. See the NOTICE file\n", + "# distributed with this work for additional information\n", + "# regarding copyright ownership. The ASF licenses this file\n", + "# to you under the Apache License, Version 2.0 (the\n", + "# \"License\"); you may not use this file except in compliance\n", + "# with the License. You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing,\n", + "# software distributed under the License is distributed on an\n", + "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n", + "# KIND, either express or implied. See the License for the\n", + "# specific language governing permissions and limitations\n", + "# under the License." + ], + "id": "g4jtzXwEvW2-" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HtysPAVSvcMg" + }, + "source": [ + "# 🌦️ Weather forecasting -- _Training_\n", + "\n", + "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb)\n", + "\n", + "This sample is broken into the following notebooks:\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧭 Overview**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/1-overview.ipynb):\n", + " Go through what we want to achieve, and explore the data we want to use as _inputs and outputs_ for our model.\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🗄️ Create the dataset**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/2-dataset.ipynb):\n", + " Use [Apache Beam](https://beam.apache.org/) to fetch data from [Earth Engine](https://earthengine.google.com/) in parallel, and create a dataset for our model in [Dataflow](https://cloud.google.com/dataflow).\n", + "\n", + "* ![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧠 Train the model**:\n", + " Build a simple _Fully Convolutional Network_ in [PyTorch](https://pytorch.org/) and train it in [Vertex AI](https://cloud.google.com/vertex-ai/docs/training/custom-training) with the dataset we created.\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🔮 Model predictions**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/4-predictions.ipynb):\n", + " Get predictions from the model with data it has never seen before.\n", + "\n", + "This sample leverages geospatial satellite and precipitation data from [Google Earth Engine](https://earthengine.google.com/).\n", + "Using satellite imagery, you'll build and train a model for rain \"nowcasting\" i.e. predicting the amount of rainfall for a given geospatial region and time in the immediate future.\n", + "\n", + "* ⏲️ **Time estimate**: ~40 minutes\n", + "* 💰 **Cost estimate**: [a few cents on Vertex AI](https://cloud.google.com/vertex-ai/pricing#custom-trained_models)\n", + "\n", + "💚 This is one of many **machine learning how-to samples** inspired from **real climate solutions** aired on the [People and Planet AI 🎥 series](https://www.youtube.com/playlist?list=PLIivdWyY5sqI-llB35Dcb187ZG155Rs_7)." + ], + "id": "HtysPAVSvcMg" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RuFZck60B8t-" + }, + "source": [ + "# 🎬 Before you begin\n", + "\n", + "Let's start by cloning the GitHub repository, and installing some dependencies." + ], + "id": "RuFZck60B8t-" + }, + { + "cell_type": "code", + "source": [ + "# Now let's get the code from GitHub and navigate to the sample.\n", + "!git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git\n", + "%cd python-docs-samples/people-and-planet-ai/weather-forecasting" + ], + "metadata": { + "id": "W-fPxkYD9FaP" + }, + "execution_count": null, + "outputs": [], + "id": "W-fPxkYD9FaP" + }, + { + "cell_type": "markdown", + "source": [ + "The [`weather-model`](serving/weather-model) local package contains the model definition and the training script.\n", + "This ensures we use the same model definition for both training and predictions.\n" + ], + "metadata": { + "id": "r5OijZcuInAe" + }, + "id": "r5OijZcuInAe" + }, + { + "cell_type": "code", + "source": [ + "# Upgrade `setuptools` to install packages from pyproject.toml files.\n", + "!pip install --quiet --upgrade --no-warn-conflicts pip setuptools\n", + "\n", + "# We need `build` and `virtualenv` to build the local packages.\n", + "!pip install --quiet build virtualenv\n", + "\n", + "# Install the `weather-model` local package.\n", + "!pip install google-cloud-aiplatform serving/weather-model" + ], + "metadata": { + "id": "AlcsK6pd-x0I" + }, + "execution_count": null, + "outputs": [], + "id": "AlcsK6pd-x0I" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "G75Y6HszxBL8" + }, + "source": [ + "> **🛑 Restart the runtime 🛑**\n", + "\n", + "Colab already comes with many dependencies pre-loaded.\n", + "In order to ensure everything runs as expected, we **_must_ restart the runtime**. This allows Colab to load the latest versions of the libraries.\n", + "\n", + "![\"Runtime\" > \"Restart runtime\"](images/restart-runtime.png)" + ], + "id": "G75Y6HszxBL8" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "xGXRHJ9TFs24" + }, + "outputs": [], + "source": [ + "# Alternatively, restart the runtime by ending the process.\n", + "exit()" + ], + "id": "xGXRHJ9TFs24" + }, + { + "cell_type": "markdown", + "source": [ + "After restarting the runtime, let's navigate back into the sample directory." + ], + "metadata": { + "id": "WI_vvBpPD4tr" + }, + "id": "WI_vvBpPD4tr" + }, + { + "cell_type": "code", + "source": [ + "%cd python-docs-samples/people-and-planet-ai/weather-forecasting" + ], + "metadata": { + "id": "6fdyXMdlD3cz", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "457ec966-1f2a-4e76-df1b-62d7d9d77e60" + }, + "execution_count": 12, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[Errno 2] No such file or directory: 'python-docs-samples/people-and-planet-ai/weather-forecasting'\n", + "/content/python-docs-samples/people-and-planet-ai/weather-forecasting/python-docs-samples/people-and-planet-ai/weather-forecasting\n" + ] + } + ], + "id": "6fdyXMdlD3cz" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mHvEEW6oyFGV" + }, + "source": [ + "## ☁️ My Google Cloud resources\n", + "\n", + "Make sure you have followed these steps to configure your Google Cloud project:\n", + "\n", + "1. Enable the APIs: _Vertex AI_\n", + "\n", + " \n", + "\n", + "1. Create or use an existing Cloud Storage bucket.\n", + "\n", + " \n", + "\n", + "Once you have everything ready, you can go ahead and fill in your Google Cloud resources in the following code cell.\n", + "Make sure you run it!" + ], + "id": "mHvEEW6oyFGV" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "YMPNUR0pyRvy" + }, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "import os\n", + "from google.colab import auth\n", + "\n", + "# Please fill in these values.\n", + "project = \"\" #@param {type:\"string\"}\n", + "bucket = \"\" #@param {type:\"string\"}\n", + "location = \"us-central1\" #@param {type:\"string\"}\n", + "\n", + "# Quick input validations.\n", + "assert project, \"⚠️ Please provide a Google Cloud project ID\"\n", + "assert bucket, \"⚠️ Please provide a Cloud Storage bucket name\"\n", + "assert not bucket.startswith('gs://'), f\"⚠️ Please remove the gs:// prefix from the bucket name: {bucket}\"\n", + "assert location, \"⚠️ Please provide a Google Cloud location\"\n", + "\n", + "# Authenticate to Colab.\n", + "auth.authenticate_user()\n", + "\n", + "# Set GOOGLE_CLOUD_PROJECT for google.auth.default().\n", + "os.environ['GOOGLE_CLOUD_PROJECT'] = project\n", + "\n", + "# Set the gcloud project for other gcloud commands.\n", + "!gcloud config set project {project}" + ], + "id": "YMPNUR0pyRvy" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "02b1b9dd" + }, + "source": [ + "# 🧠 Train the model locally\n", + "\n", + "We need our model for both training and for prediction.\n", + "So we created the local [`weather-model`](serving/weather-model) module.\n", + "It contains [`weather/model.py`](serving/weather-model/weather/model.py) where the model is defined, and [`weather/trainer.py`](serving/weather-model/weather/trainer.py) where all the training code lives." + ], + "id": "02b1b9dd" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "PY5H3OMjfVAR" + }, + "source": [ + "## 📖 Read the dataset\n", + "\n", + "Unfortunately, PyTorch cannot read files from Cloud Storage out of the box.\n", + "Fortunately, Vertex AI uses [Cloud Storage FUSE](https://cloud.google.com/blog/products/ai-machine-learning/cloud-storage-file-system-ai-training) to mount and access Cloud Storage files as if they were local files.\n", + "\n", + "For now, let's download the data files we created in the [🗄️ **Create the dataset**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/2-dataset.ipynb) notebook to have them locally." + ], + "id": "PY5H3OMjfVAR" + }, + { + "cell_type": "code", + "source": [ + "data_path_gcs = f\"gs://{bucket}/weather/data\"\n", + "\n", + "!mkdir -p data-training\n", + "!gsutil -m cp {data_path_gcs}/* data-training" + ], + "metadata": { + "id": "h_IUpnqvO-sa" + }, + "id": "h_IUpnqvO-sa", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "First, we need to load the dataset to feed it to the model.\n", + "To read a dataset in PyTorch, we could manually instantiate a subclass of `torch.utils.data.Dataset`, but we're going to use [Hugging Face 🤗 Datasets](https://huggingface.co/docs/datasets/main/en/index), which are a high-level interface to use datasets more easily.\n", + "\n", + "Our data files are compressed NumPy files, which we can easily load with NumPy.\n", + "To load them into a 🤗 Dataset, we can use [`Dataset.from_dict`](https://huggingface.co/docs/datasets/main/en/loading#python-dictionary) and pass it a dictionary containing all the file names of our data files.\n", + "Then, we use [`Dataset.map`](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.map) to read the data files and processs the examples in parallel.\n", + "Additionally, we _augment_ the data by rotating and flipping each example.\n", + "To split the our dataset into training and a testing/validation subsets, we use [`Dataset.train_test_split`](https://huggingface.co/docs/datasets/main/en/package_reference/main_classes#datasets.Dataset.train_test_split).\n", + "\n", + "In [`weather/trainer.py`](serving/weather-model/weather/trainer.py) we defined the `read_dataset` function to load our data files, and returns us a 🤗 Dataset with train/test splits." + ], + "metadata": { + "id": "Pl3qbyggO7rR" + }, + "id": "Pl3qbyggO7rR" + }, + { + "cell_type": "code", + "source": [ + "from weather.trainer import read_dataset\n", + "\n", + "data_path = 'data-training'\n", + "train_test_ratio = 0.9 # 90% train, 10% test\n", + "\n", + "# Read the dataset with train/test splits.\n", + "dataset = read_dataset(data_path, train_test_ratio)" + ], + "metadata": { + "id": "rxwvw7ihacXy" + }, + "id": "rxwvw7ihacXy", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "print(dataset)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "ItTBWR98dByh", + "outputId": "8c522562-9937-4dc5-e482-8bec3cdba277" + }, + "id": "ItTBWR98dByh", + "execution_count": 16, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "DatasetDict({\n", + " train: Dataset({\n", + " features: ['inputs', 'labels'],\n", + " num_rows: 3069\n", + " })\n", + " test: Dataset({\n", + " features: ['inputs', 'labels'],\n", + " num_rows: 341\n", + " })\n", + "})\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "> 💡 For more information on loading data into a 🤗 Dataset, refer to the [Loading data](https://huggingface.co/docs/datasets/main/en/loading) guide.\n", + "\n", + "🤗 Datasets allow for random access just like PyTorch Datasets.\n", + "\n", + "Let's see the shapes of the first training example from the `train` split.\n", + "When we access an example, we get an `{'inputs': list, 'labels': list}` dictionary, where each value is a [Python list](https://docs.python.org/3/library/stdtypes.html#list).\n", + "We can then convert them into [PyTorch tensors](https://pytorch.org/docs/stable/tensors.html) for further use." + ], + "metadata": { + "id": "jnlg80Tl4QLS" + }, + "id": "jnlg80Tl4QLS" + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "\n", + "train_dataset = dataset['train']\n", + "example = train_dataset[0] # random access the first element\n", + "\n", + "print(f\"inputs: {torch.as_tensor(example['inputs']).shape}\")\n", + "print(f\"labels: {torch.as_tensor(example['labels']).shape}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Ji67MIKQ58Zr", + "outputId": "7e98452a-2744-4a72-93e8-9a53e1f6b695" + }, + "id": "Ji67MIKQ58Zr", + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "inputs: torch.Size([5, 5, 52])\n", + "labels: torch.Size([5, 5, 2])\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The _inputs_ have the shape `(width, height, num_inputs)`, where each input is the value of an Earth Engine band.\n", + "\n", + "The _outputs_ have the shape `(width, height, num_outputs)`, where each output is a prediction.\n", + "We're predicting for 2 and 6 hours into the future, so we get 2 outputs." + ], + "metadata": { + "id": "JWFJY1pv7T91" + }, + "id": "JWFJY1pv7T91" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "oQnLpK0OmutA" + }, + "source": [ + "## 📓 Define the model\n", + "\n", + "First we define our model, which is a very simple _Fully Convolutional Network_.\n", + "The input data can consist of potentially very large numbers, but machine learning generally prefers small numbers around -1 and 1.\n", + "So in [`weather/model.py`](weather/model.py) we defined a `Normalization` layer which applies [Z-Score](https://developers.google.com/machine-learning/data-prep/transform/normalization#z-score) to normalize all the model's inputs as a first step.\n", + "But we need to provide it with the [_mean_](https://en.wikipedia.org/wiki/Mean) and [_standard deviation_](https://en.wikipedia.org/wiki/Standard_deviation) from the training dataset.\n", + "\n", + "A model always processes _batches_ of inputs, so we always get an extra _first_ dimension.\n", + "This means that for all the layers in the model, our inputs have the shape `(batch, width, height, num_inputs)`, and our outputs have the shape `(batch, width, height, num_outputs)`.\n", + "\n", + "We need to calculate the mean and standard deviation for each input, so each band is normalized within its own range.\n", + "Both the mean and standard deviation must have the shape `(batch, width, height, num_inputs)`, which allows them to _broadcast_ to any batch size, width and height, as long as the `num_inputs` match." + ], + "id": "oQnLpK0OmutA" + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "\n", + "# Let's get the mean and standard deviation.\n", + "data = np.array(dataset['train']['inputs'], np.float32)\n", + "mean = data.mean(axis=(0, 1, 2))[None, None, None, :]\n", + "std = data.std(axis=(0, 1, 2))[None, None, None, :]\n", + "\n", + "print(f\"mean: {mean.shape}\")\n", + "print(f\"std: {std.shape}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "YkOwsJBuYIHg", + "outputId": "4a3ed264-5dca-4169-bee9-9e4ecaea5409" + }, + "id": "YkOwsJBuYIHg", + "execution_count": 18, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "mean: (1, 1, 1, 52)\n", + "std: (1, 1, 1, 52)\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Let's see how the normalization works for a sample of an example's inputs." + ], + "metadata": { + "id": "meHaHpxW-zt5" + }, + "id": "meHaHpxW-zt5" + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "\n", + "from weather.model import Normalization\n", + "\n", + "normalization = Normalization(mean, std)\n", + "\n", + "sample = lambda x: x[0, 0, 0, 10:15].detach().numpy()\n", + "\n", + "print(f\"mean: {sample(normalization.mean)}\")\n", + "print(f\"std: {sample(normalization.std)}\")\n", + "print('-' * 40)\n", + "\n", + "example = dataset['train'][0]\n", + "example_inputs = torch.as_tensor([example['inputs']])\n", + "normalized_inputs = normalization(example_inputs)\n", + "print(f\"inputs: {sample(example_inputs)}\")\n", + "print(f\"normalized: {sample(normalized_inputs)}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "EUT8fowo-_Bv", + "outputId": "5fc2a64b-7dde-4803-9d21-34264bcf93f5" + }, + "id": "EUT8fowo-_Bv", + "execution_count": 19, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "mean: [2202.3132 2355.514 2328.052 2470.9158 2687.0806]\n", + "std: [256.82922 324.5936 332.1437 480.68338 351.21927]\n", + "----------------------------------------\n", + "inputs: [2295. 2514. 2534. 2774. 2957.]\n", + "normalized: [0.36088872 0.48826003 0.6200569 0.6305278 0.76852113]\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "After applying the `Normalization` layer, we get small numbers much closer to the range within -1 and 1, they don't have to be _exactly_ within the range, just close enough.\n", + "\n", + "Another thing to note is that our data is in a channels-last format, like `(width, height, channels)`.\n", + "But PyTorch expects channels-first format in the convolutional layers, like `(channels, width, height)`.\n", + "We still want to pass our inputs in a channels-last format and want the predictions back as channels-last for convenience, but we must convert them to channels-first for PyTorch convolutional layers to work.\n", + "\n", + "In [`weather/model.py`](serving/weather-model/weather/model.py) we define the `MoveDim` layer, which works similar to [`torch.movedim`](https://pytorch.org/docs/stable/generated/torch.movedim.html) so the model can move the channels dimension as needed.\n" + ], + "metadata": { + "id": "Idvef7Id49vE" + }, + "id": "Idvef7Id49vE" + }, + { + "cell_type": "code", + "source": [ + "from weather.model import MoveDim\n", + "\n", + "# We move the channels/last dimension (-1) to the second index (1),\n", + "# since the first (0) is for the batch dimension.\n", + "to_channels_first = MoveDim(-1, 1)\n", + "channels_first = to_channels_first(normalized_inputs)\n", + "\n", + "print(f\"normalized: {normalized_inputs.shape}\")\n", + "print(f\"channels-first: {channels_first.shape}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "AkrmnehOCuol", + "outputId": "0a41f8d0-8f61-4946-92e1-67e33e3eddf1" + }, + "id": "AkrmnehOCuol", + "execution_count": 20, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "normalized: torch.Size([1, 5, 5, 52])\n", + "channels-first: torch.Size([1, 52, 5, 5])\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The model then passes the data through a\n", + "[2D Convolutional layer](https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html) for downsampling, and then through a\n", + "[2D DeConvolutional layer](https://pytorch.org/docs/stable/generated/torch.nn.ConvTranspose2d.html) for upsampling, so we end up with images the same size as the input image.\n", + "We used a [`ReLU`](https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html) activation function inbetween all hidden layers since it's typically a good general purpose activation function.\n", + "\n", + "The Conv2D and DeConv2D layers form a very simple Fully Convolutional Network architecture, and since we're using the same _kernel size_ for both we get the same `(width, height)` as outputs." + ], + "metadata": { + "id": "6JpbxntkEEtv" + }, + "id": "6JpbxntkEEtv" + }, + { + "cell_type": "code", + "source": [ + "num_inputs = 52\n", + "num_hidden1 = 64\n", + "num_hidden2 = 128\n", + "kernel_size = (3, 3)\n", + "\n", + "fully_convolutional_layers = torch.nn.Sequential(\n", + " torch.nn.Conv2d(num_inputs, num_hidden1, kernel_size),\n", + " torch.nn.ReLU(),\n", + " torch.nn.ConvTranspose2d(num_hidden1, num_hidden2, kernel_size),\n", + " torch.nn.ReLU(),\n", + ")\n", + "\n", + "fcn_outputs = fully_convolutional_layers(channels_first)\n", + "print(f\"FCN outputs: {fcn_outputs.shape}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "3Ima73TIEG1z", + "outputId": "4929396e-74c2-4cd3-9fdf-31e12117f064" + }, + "id": "3Ima73TIEG1z", + "execution_count": 21, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "FCN outputs: torch.Size([1, 128, 5, 5])\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "Now, let's convert the results back into channels-last format with `MoveDim`." + ], + "metadata": { + "id": "TkRDEANqFoLd" + }, + "id": "TkRDEANqFoLd" + }, + { + "cell_type": "code", + "source": [ + "to_channels_last = MoveDim(1, -1)\n", + "channels_last = to_channels_last(fcn_outputs)\n", + "\n", + "print(f\"channels-last: {channels_last.shape}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-oAMnWtfFuzr", + "outputId": "7a436c24-4ab0-4040-dd6f-cbbf167d03ff" + }, + "id": "-oAMnWtfFuzr", + "execution_count": 22, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "channels-last: torch.Size([1, 5, 5, 128])\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "For the last layer, we use a [`Linear`](https://pytorch.org/docs/stable/generated/torch.nn.Linear.html) layer with the number of outputs we want.\n", + "Since we can't have negative precipitation, we passed the model's outputs through a final `ReLU` activation function." + ], + "metadata": { + "id": "7x_OkkNvGabm" + }, + "id": "7x_OkkNvGabm" + }, + { + "cell_type": "code", + "source": [ + "num_outputs = 2\n", + "\n", + "linear = torch.nn.Linear(num_hidden2, num_outputs)\n", + "relu = torch.nn.ReLU()\n", + "\n", + "with torch.no_grad():\n", + " raw_predictions = linear(channels_last)\n", + " predictions = relu(raw_predictions)\n", + "\n", + "print(f\"predictions: {predictions.shape}\")\n", + "print(predictions[0, 0, 0])" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "q0T8CpEaGJew", + "outputId": "227fe92d-2f86-4160-aad9-971d08032a51" + }, + "id": "q0T8CpEaGJew", + "execution_count": 23, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "predictions: torch.Size([1, 5, 5, 2])\n", + "tensor([0.0650, 0.0010])\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "CDQIGsp24EX9" + }, + "source": [ + "In [`weather/model.py`](serving/weather-model/weather/model.py) we defined the `WeatherModel` and `WeatherConfig` classes.\n", + "\n", + "The `WeatherModel` class inherits from [`PreTrainedModel`](https://huggingface.co/docs/transformers/main/en/main_classes/model) to make it compatible with [🤗 Transformers](https://huggingface.co/docs/transformers/main/en/index).\n", + "\n", + "The model definition includes the loss function, so it knows how good or bad their predictions were.\n", + "We could use any regression loss function like [Mean Absolute Error (L1)](https://pytorch.org/docs/stable/generated/torch.nn.L1Loss.html) or [Mean Squared Error (L2)](https://pytorch.org/docs/stable/generated/torch.nn.MSELoss.html).\n", + "PyTorch provides a [Smooth L1 Loss](https://pytorch.org/docs/stable/generated/torch.nn.SmoothL1Loss.html), which chooses between L1 and L2 depending on a certain criteria.\n", + "It's less sensitive to outliers, so we'll use that.\n", + "\n", + "To create a `WeatherModel`, we have to pass it a `WeatherConfig`.\n", + "The `WeatherConfig` contains all the model's hyperparameters, and we must also pass the _mean_ and _standard deviation_ from the training dataset for the normalization layer.\n", + "We defined `WeatherModel.create` which takes in the training dataset inputs and returns us a `WeatherModel` with the right `WeatherConfig`." + ], + "id": "CDQIGsp24EX9" + }, + { + "cell_type": "code", + "source": [ + "from weather.model import WeatherModel\n", + "\n", + "model = WeatherModel.create(dataset['train']['inputs'])\n", + "print(model)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "h0bzkGqwo-Ic", + "outputId": "4a0f8622-e3fd-49cf-9eb2-c9e2777174b0" + }, + "id": "h0bzkGqwo-Ic", + "execution_count": 27, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "WeatherModel(\n", + " (layers): Sequential(\n", + " (0): Normalization()\n", + " (1): MoveDim()\n", + " (2): Conv2d(52, 64, kernel_size=(3, 3), stride=(1, 1))\n", + " (3): ReLU()\n", + " (4): ConvTranspose2d(64, 128, kernel_size=(3, 3), stride=(1, 1))\n", + " (5): ReLU()\n", + " (6): MoveDim()\n", + " (7): Linear(in_features=128, out_features=2, bias=True)\n", + " (8): ReLU()\n", + " )\n", + ")\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The model outputs a `{'loss': torch.Tensor, 'logits': torch.Tensor}` dictionary during training, and a `{'logits': torch.Tensor}` dictionary during predictions.\n", + "This is what 🤗 Transformers expect for [model outputs](https://huggingface.co/docs/transformers/main/en/main_classes/output).\n", + "\n", + "Remember that we _must_ pass a _batch_ of inputs to the model, not a single input." + ], + "metadata": { + "id": "6iS60sGCJczT" + }, + "id": "6iS60sGCJczT" + }, + { + "cell_type": "code", + "source": [ + "example = dataset['test']\n", + "inputs_batch = torch.as_tensor(example['inputs'][:1])\n", + "labels_batch = torch.as_tensor(example['labels'][:1])\n", + "\n", + "# We pass the labels as well to get the loss, but it's optional.\n", + "# If we don't pass the labels, we simply won't get the loss.\n", + "# The predictions are under the 'logits' key.\n", + "with torch.no_grad():\n", + " predictions = model(inputs_batch, labels_batch)\n", + "\n", + "print(f\"inputs: {inputs_batch.shape}\")\n", + "print(f\"labels: {labels_batch.shape}\")\n", + "print(f\"loss: {predictions['loss']}\")\n", + "print(f\"predictions: {predictions['logits'].shape}\")\n", + "print(\"-\" * 40)\n", + "print(f\"sample labels: {labels_batch[0, 0, 0]}\")\n", + "print(f\"sample predictions: {predictions['logits'][0, 0, 0]}\")" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "gKwdukOzJeNA", + "outputId": "63287630-c21b-4b21-c6b0-168423fd2746" + }, + "id": "gKwdukOzJeNA", + "execution_count": 30, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "inputs: torch.Size([1, 5, 5, 52])\n", + "labels: torch.Size([1, 5, 5, 2])\n", + "loss: 0.009296745993196964\n", + "predictions: torch.Size([1, 5, 5, 2])\n", + "----------------------------------------\n", + "sample labels: tensor([0., 0.])\n", + "sample predictions: tensor([0.0797, 0.0000])\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "These predictions don't look great because we haven't trained our model.\n", + "Fortunately, since we've made our model compatible with 🤗 Transformers, we can simply use [`Trainer`](https://huggingface.co/docs/transformers/main/en/main_classes/trainer), which takes care of all the training steps, automatically optimizes the whole process, and uses accelerators like GPUs if available." + ], + "metadata": { + "id": "cxyoRnNlzsYu" + }, + "id": "cxyoRnNlzsYu" + }, + { + "cell_type": "markdown", + "source": [ + "## 👟 Train the model\n", + "\n", + "We have to define the number of times we want the model to go through the training dataset, this is called the number of _epochs_.\n", + "We also have to define the _batch size_ we want to use during training and testing, this can have a big impact in how fast the model trains, as a rule of thumb the larger the better as long as it fits into memory.\n", + "We define all these parameters with [`TrainingArguments`](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.TrainingArguments).\n", + "\n", + "Then we pass the model, the `TrainingArguments`, and the training and testing datasets into the `Trainer`.\n", + "Finally we can train the model with [`Trainer.train`](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.train)." + ], + "metadata": { + "id": "xG6PnXhfLzxO" + }, + "id": "xG6PnXhfLzxO" + }, + { + "cell_type": "code", + "source": [ + "from transformers import TrainingArguments, Trainer\n", + "\n", + "epochs = 5\n", + "batch_size = 512\n", + "\n", + "# Define our training job.\n", + "training_args = TrainingArguments(\n", + " output_dir=\"checkpoints\",\n", + " per_device_train_batch_size=batch_size,\n", + " per_device_eval_batch_size=batch_size,\n", + " num_train_epochs=epochs,\n", + " logging_strategy=\"epoch\",\n", + " evaluation_strategy=\"epoch\",\n", + ")\n", + "trainer = Trainer(\n", + " model,\n", + " training_args,\n", + " train_dataset=dataset[\"train\"],\n", + " eval_dataset=dataset[\"test\"],\n", + ")\n", + "\n", + "# Run the training job.\n", + "trainer.train()" + ], + "metadata": { + "id": "x4ta1oIsMveF", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 825 + }, + "outputId": "3a4e8674-dfa2-4cbf-8445-c3bcccfe4769" + }, + "id": "x4ta1oIsMveF", + "execution_count": 31, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "PyTorch: setting up devices\n", + "The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).\n", + "***** Running training *****\n", + " Num examples = 3069\n", + " Num Epochs = 5\n", + " Instantaneous batch size per device = 512\n", + " Total train batch size (w. parallel, distributed & accumulation) = 512\n", + " Gradient Accumulation steps = 1\n", + " Total optimization steps = 30\n", + " Number of trainable parameters = 104234\n", + "Could not estimate the number of tokens of the input, floating-point operations will not be computed\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/plain": [ + "" + ], + "text/html": [ + "\n", + "
\n", + " \n", + " \n", + " [30/30 00:23, Epoch 5/5]\n", + "
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
EpochTraining LossValidation Loss
11.2889001.016647
21.2793001.009680
31.2717001.004657
41.2667001.001499
51.2636001.000306

" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "***** Running Evaluation *****\n", + " Num examples = 341\n", + " Batch size = 512\n", + "***** Running Evaluation *****\n", + " Num examples = 341\n", + " Batch size = 512\n", + "***** Running Evaluation *****\n", + " Num examples = 341\n", + " Batch size = 512\n", + "***** Running Evaluation *****\n", + " Num examples = 341\n", + " Batch size = 512\n", + "***** Running Evaluation *****\n", + " Num examples = 341\n", + " Batch size = 512\n", + "\n", + "\n", + "Training completed. Do not forget to share your model on huggingface.co/models =)\n", + "\n", + "\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "TrainOutput(global_step=30, training_loss=1.2740394274393718, metrics={'train_runtime': 23.7216, 'train_samples_per_second': 646.878, 'train_steps_per_second': 1.265, 'total_flos': 0.0, 'train_loss': 1.2740394274393718, 'epoch': 5.0})" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "> 💡 Both losses should go down every epoch, and they should be roughly similar.\n", + "> If the training loss goes down, but the testing loss stays flat or goes up, it might be a sign that the model is [overfitting](https://developers.google.com/machine-learning/crash-course/generalization/peril-of-overfitting), meaning that it's memorizing the training dataset instead of learning to generalize." + ], + "metadata": { + "id": "jPFCmhruOvjB" + }, + "id": "jPFCmhruOvjB" + }, + { + "cell_type": "markdown", + "source": [ + "## 💾 Save and load the model\n", + "\n", + "After the model has finished training, we can save it with [`Trainer.save_model`](https://huggingface.co/docs/transformers/main/en/main_classes/trainer#transformers.Trainer.save_model).\n", + "\n" + ], + "metadata": { + "id": "_AxB_p2-z4UH" + }, + "id": "_AxB_p2-z4UH" + }, + { + "cell_type": "code", + "source": [ + "trainer.save_model(\"model\")\n", + "\n", + "!ls -lh model" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "NPLnvRydOik0", + "outputId": "c788cf33-ec67-4612-9f44-1262dc872625" + }, + "id": "NPLnvRydOik0", + "execution_count": 32, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "Saving model checkpoint to model\n", + "Configuration saved in model/config.json\n", + "Model weights saved in model/pytorch_model.bin\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 420K\n", + "-rw-r--r-- 1 root root 3.4K Jan 11 21:33 config.json\n", + "-rw-r--r-- 1 root root 410K Jan 11 21:33 pytorch_model.bin\n", + "-rw-r--r-- 1 root root 3.4K Jan 11 21:33 training_args.bin\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "icxhkboQA_o5" + }, + "source": [ + "Now that we have a trained model, we can save it and load it anywhere else.\n", + "We can load a 🤗 Transformers model with [`PreTrainedModel.from_pretrained`](https://huggingface.co/docs/transformers/main/en/main_classes/model#transformers.PreTrainedModel.from_pretrained), in our case with `WeatherModel.from_pretrained`.\n", + "This loads all the model's hyperparameters as well as the _mean_ and _standard deviation_ for the normalization layer." + ], + "id": "icxhkboQA_o5" + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "xsxX2Mb-CwWk", + "outputId": "4bc42359-88c8-4803-be4c-34cd9e07a8e1" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "loading configuration file model/config.json\n", + "Model config WeatherConfig {\n", + " \"architectures\": [\n", + " \"WeatherModel\"\n", + " ],\n", + " \"kernel_size\": [\n", + " 3,\n", + " 3\n", + " ],\n", + " \"mean\": [\n", + " [\n", + " [\n", + " [\n", + " 0.965579092502594,\n", + " 2.3415911197662354,\n", + " 6.150100231170654,\n", + " 476.72564697265625,\n", + " 421.8377380371094,\n", + " 521.5245971679688,\n", + " 109.100830078125,\n", + " 300.76141357421875,\n", + " 262.6136474609375,\n", + " 5461.68310546875,\n", + " 2202.313232421875,\n", + " 2355.513916015625,\n", + " 2328.052001953125,\n", + " 2470.915771484375,\n", + " 2687.08056640625,\n", + " 2737.617919921875,\n", + " 2684.49365234375,\n", + " 2650.0927734375,\n", + " 2816.9892578125,\n", + " 509.75927734375,\n", + " 451.73077392578125,\n", + " 535.8512573242188,\n", + " 140.81637573242188,\n", + " 276.422607421875,\n", + " 257.4959411621094,\n", + " 4964.77197265625,\n", + " 2143.988037109375,\n", + " 2276.671630859375,\n", + " 2243.602783203125,\n", + " 2340.478759765625,\n", + " 2601.414794921875,\n", + " 2623.432373046875,\n", + " 2567.951904296875,\n", + " 2536.750732421875,\n", + " 2718.31591796875,\n", + " 601.8285522460938,\n", + " 540.8607788085938,\n", + " 601.079345703125,\n", + " 250.1461639404297,\n", + " 271.73126220703125,\n", + " 291.7319641113281,\n", + " 4314.62744140625,\n", + " 2050.633544921875,\n", + " 2152.40283203125,\n", + " 2113.24267578125,\n", + " 2147.13232421875,\n", + " 2477.33935546875,\n", + " 2455.325927734375,\n", + " 2397.76416015625,\n", + " 2371.694091796875,\n", + " 2573.267578125,\n", + " 1774.619873046875\n", + " ]\n", + " ]\n", + " ]\n", + " ],\n", + " \"model_type\": \"weather\",\n", + " \"num_hidden1\": 64,\n", + " \"num_hidden2\": 128,\n", + " \"num_inputs\": 52,\n", + " \"num_outputs\": 2,\n", + " \"std\": [\n", + " [\n", + " [\n", + " [\n", + " 3.4099764823913574,\n", + " 5.503620147705078,\n", + " 8.816463470458984,\n", + " 652.4397583007812,\n", + " 596.1942138671875,\n", + " 676.47119140625,\n", + " 253.98402404785156,\n", + " 383.220947265625,\n", + " 336.3753967285156,\n", + " 1895.1807861328125,\n", + " 256.8292236328125,\n", + " 324.5935974121094,\n", + " 332.1437072753906,\n", + " 480.6833801269531,\n", + " 351.2192687988281,\n", + " 423.1502685546875,\n", + " 439.25201416015625,\n", + " 433.09442138671875,\n", + " 380.0411376953125,\n", + " 719.5698852539062,\n", + " 652.4895629882812,\n", + " 723.5523071289062,\n", + " 309.5145568847656,\n", + " 352.95697021484375,\n", + " 330.34222412109375,\n", + " 2052.62060546875,\n", + " 294.1995849609375,\n", + " 371.49407958984375,\n", + " 375.6823425292969,\n", + " 520.1500244140625,\n", + " 360.0391540527344,\n", + " 455.3539123535156,\n", + " 472.1324157714844,\n", + " 466.9129943847656,\n", + " 412.6302185058594,\n", + " 861.4622802734375,\n", + " 791.761474609375,\n", + " 841.4432983398438,\n", + " 514.6561279296875,\n", + " 439.2526550292969,\n", + " 461.8729553222656,\n", + " 2031.054931640625,\n", + " 326.0904541015625,\n", + " 409.5498962402344,\n", + " 408.24627685546875,\n", + " 533.72119140625,\n", + " 348.2250671386719,\n", + " 464.1885070800781,\n", + " 482.2993469238281,\n", + " 480.4700622558594,\n", + " 428.2205505371094,\n", + " 1641.6630859375\n", + " ]\n", + " ]\n", + " ]\n", + " ],\n", + " \"torch_dtype\": \"float32\",\n", + " \"transformers_version\": \"4.25.1\"\n", + "}\n", + "\n", + "loading weights file model/pytorch_model.bin\n", + "All model checkpoint weights were used when initializing WeatherModel.\n", + "\n", + "All the weights of WeatherModel were initialized from the model checkpoint at model.\n", + "If your task is similar to the task the model of the checkpoint was trained on, you can already use WeatherModel for predictions without further training.\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "WeatherModel(\n", + " (layers): Sequential(\n", + " (0): Normalization()\n", + " (1): MoveDim()\n", + " (2): Conv2d(52, 64, kernel_size=(3, 3), stride=(1, 1))\n", + " (3): ReLU()\n", + " (4): ConvTranspose2d(64, 128, kernel_size=(3, 3), stride=(1, 1))\n", + " (5): ReLU()\n", + " (6): MoveDim()\n", + " (7): Linear(in_features=128, out_features=2, bias=True)\n", + " (8): ReLU()\n", + " )\n", + ")\n" + ] + } + ], + "source": [ + "from weather.model import WeatherModel\n", + "\n", + "model = WeatherModel.from_pretrained(\"model\")\n", + "print(model)" + ], + "id": "xsxX2Mb-CwWk" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IO73AYtsCIQ_" + }, + "source": [ + "# ☁️ Train the model in Vertex AI\n", + "\n", + "For this example we're training on a very small dataset for a very small number of epochs.\n", + "This means we don't have a representative number of examples and the model hasn't seen the data enough times, so it won't perform very well.\n", + "\n", + "Training on larger datasets for a large number of epochs can take a lot of time, so it might be a good idea to do the training in Cloud.\n", + "[Vertex AI](https://cloud.google.com/vertex-ai) is a great option, and even allows us to use hardware accelerators like GPUs.\n", + "There are [PyTorch pre-built containers](https://cloud.google.com/vertex-ai/docs/training/pre-built-containers#pytorch) which include PyTorch and many common libraries, so we don't need to build a custom container.\n", + "\n", + "The model and trainer are defined in the [`serving/weather-model`](serving/weather-model) module.\n", + "To run it in Vertex AI, we must build the package, copy it to Cloud Storage, and launch a custom training job with [`CustomPythonPackageTrainingJob`](https://cloud.google.com/python/docs/reference/aiplatform/latest/google.cloud.aiplatform.CustomPythonPackageTrainingJob)." + ], + "id": "IO73AYtsCIQ_" + }, + { + "cell_type": "code", + "source": [ + "# Build the `weather-model` package.\n", + "!python -m build serving/weather-model" + ], + "metadata": { + "id": "v1SZt1iA2Wrh" + }, + "execution_count": null, + "outputs": [], + "id": "v1SZt1iA2Wrh" + }, + { + "cell_type": "code", + "source": [ + "!ls -lh serving/weather-model/dist" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "2f2b4dc2-a287-4822-caed-7f8115246d7d", + "id": "y4F1_eA32Wrh" + }, + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "total 16K\n", + "-rw-r--r-- 1 root root 5.9K Jan 11 18:29 weather_model-1.0.0-py3-none-any.whl\n", + "-rw-r--r-- 1 root root 4.3K Jan 11 18:29 weather-model-1.0.0.tar.gz\n" + ] + } + ], + "id": "y4F1_eA32Wrh" + }, + { + "cell_type": "code", + "source": [ + "# Stage the `weather-model` package in Cloud Storage.\n", + "!gsutil cp serving/weather-model/dist/weather-model-1.0.0.tar.gz gs://{bucket}/weather/" + ], + "metadata": { + "id": "JA1k9ky02dsx" + }, + "id": "JA1k9ky02dsx", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "In Vertex AI, we can access Cloud Storage files directly as if they were local files via Cloud Storage FUSE.\n", + "Cloud Storage files are available under `/gcs` followed by your bucket and file path.\n", + "To learn more, see the [Cloud Storage as a File System in AI Training](https://cloud.google.com/blog/products/ai-machine-learning/cloud-storage-file-system-ai-training) blog post." + ], + "metadata": { + "id": "yk9X4YQcDPpR" + }, + "id": "yk9X4YQcDPpR" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "Ny4x99GiS2Lm" + }, + "outputs": [], + "source": [ + "from google.cloud import aiplatform\n", + "\n", + "epochs = 100\n", + "\n", + "# Cloud Storage paths.\n", + "data_path = f\"/gcs/{bucket}/weather/data\"\n", + "model_path = f\"/gcs/{bucket}/weather/model\"\n", + "\n", + "aiplatform.init(project=project, location=location, staging_bucket=bucket)\n", + "\n", + "# Launch the custom training job.\n", + "job = aiplatform.CustomPythonPackageTrainingJob(\n", + " display_name=\"weather-forecasting\",\n", + " python_package_gcs_uri=f\"gs://{bucket}/weather/weather-model-1.0.0.tar.gz\",\n", + " python_module_name=\"weather.trainer\",\n", + " container_uri=\"us-docker.pkg.dev/vertex-ai/training/pytorch-gpu.1-11:latest\",\n", + ")\n", + "job.run(\n", + " machine_type=\"n1-highmem-8\",\n", + " accelerator_type=\"NVIDIA_TESLA_T4\",\n", + " accelerator_count=1,\n", + " args=[\n", + " f\"--data-path={data_path}\",\n", + " f\"--model-path={model_path}\",\n", + " f\"--epochs={epochs}\",\n", + " ],\n", + ")" + ], + "id": "Ny4x99GiS2Lm" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "zw_kcyw4gOLF" + }, + "source": [ + "> 💡 Look at your Vertex AI training jobs: https://console.cloud.google.com/vertex-ai/training/custom-jobs" + ], + "id": "zw_kcyw4gOLF" + }, + { + "cell_type": "markdown", + "source": [ + "# ⛳️ What's next?\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🔮 Model predictions**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/2_model.ipynb):\n", + " Get predictions from the model with data it has never seen before." + ], + "metadata": { + "id": "79RnF-lYBRTS" + }, + "id": "79RnF-lYBRTS" + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "environment": { + "kernel": "python3", + "name": "tf2-gpu.2-6.m82", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-6:m82" + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/people-and-planet-ai/weather-forecasting/notebooks/4-predictions.ipynb b/people-and-planet-ai/weather-forecasting/notebooks/4-predictions.ipynb new file mode 100644 index 000000000000..8598a7e10f58 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/notebooks/4-predictions.ipynb @@ -0,0 +1,917 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "g4jtzXwEvW2-" + }, + "outputs": [], + "source": [ + "#@title ###### Licensed to the Apache Software Foundation (ASF), Version 2.0 (the \"License\")\n", + "\n", + "# Licensed to the Apache Software Foundation (ASF) under one\n", + "# or more contributor license agreements. See the NOTICE file\n", + "# distributed with this work for additional information\n", + "# regarding copyright ownership. The ASF licenses this file\n", + "# to you under the Apache License, Version 2.0 (the\n", + "# \"License\"); you may not use this file except in compliance\n", + "# with the License. You may obtain a copy of the License at\n", + "#\n", + "# http://www.apache.org/licenses/LICENSE-2.0\n", + "#\n", + "# Unless required by applicable law or agreed to in writing,\n", + "# software distributed under the License is distributed on an\n", + "# \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY\n", + "# KIND, either express or implied. See the License for the\n", + "# specific language governing permissions and limitations\n", + "# under the License." + ], + "id": "g4jtzXwEvW2-" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "HtysPAVSvcMg" + }, + "source": [ + "# 🌦️ Weather forecasting -- _Predictions_\n", + "\n", + "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/4-predictions.ipynb)\n", + "\n", + "This sample is broken into the following notebooks:\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧭 Overview**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/1-overview.ipynb):\n", + " Go through what we want to achieve, and explore the data we want to use as _inputs and outputs_ for our model.\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🗄️ Create the dataset**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/2-dataset.ipynb):\n", + " Use [Apache Beam](https://beam.apache.org/) to fetch data from [Earth Engine](https://earthengine.google.com/) in parallel, and create a dataset for our model in [Dataflow](https://cloud.google.com/dataflow).\n", + "\n", + "* [![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🧠 Train the model**](https://colab.research.google.com/github/GoogleCloudPlatform/python-docs-samples/blob/main/people-and-planet-ai/weather-forecasting/notebooks/3-training.ipynb):\n", + " Build a simple _Fully Convolutional Network_ in [PyTorch](https://pytorch.org/) and train it in [Vertex AI](https://cloud.google.com/vertex-ai/docs/training/custom-training) with the dataset we created.\n", + "\n", + "* ![Open in Colab](https://github.com/googlecolab/open_in_colab/raw/main/images/icon16.png) **🔮 Model predictions**:\n", + " Get predictions from the model with data it has never seen before.\n", + "\n", + "This sample leverages geospatial satellite and precipitation data from [Google Earth Engine](https://earthengine.google.com/).\n", + "Using satellite imagery, you'll build and train a model for rain \"nowcasting\" i.e. predicting the amount of rainfall for a given geospatial region and time in the immediate future.\n", + "\n", + "* ⏲️ **Time estimate**: ~15 minutes\n", + "* 💰 **Cost estimate**: [covered by Cloud Run free tier](https://cloud.google.com/run/pricing)\n", + "\n", + "💚 This is one of many **machine learning how-to samples** inspired from **real climate solutions** aired on the [People and Planet AI 🎥 series](https://www.youtube.com/playlist?list=PLIivdWyY5sqI-llB35Dcb187ZG155Rs_7)." + ], + "id": "HtysPAVSvcMg" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RuFZck60B8t-" + }, + "source": [ + "# 🎬 Before you begin\n", + "\n", + "Let's start by cloning the GitHub repository, and installing some dependencies." + ], + "id": "RuFZck60B8t-" + }, + { + "cell_type": "code", + "source": [ + "# Now let's get the code from GitHub and navigate to the sample.\n", + "!git clone https://github.com/GoogleCloudPlatform/python-docs-samples.git\n", + "%cd python-docs-samples/people-and-planet-ai/weather-forecasting" + ], + "metadata": { + "id": "W-fPxkYD9FaP" + }, + "execution_count": null, + "outputs": [], + "id": "W-fPxkYD9FaP" + }, + { + "cell_type": "markdown", + "source": [ + "The [`weather-data`](serving/weather-data) local package contains the functions to get data from Earth Engine.\n", + "It is used for both creating the training dataset, and for predictions.\n", + "\n", + "The [`weather-model`](serving/weather-model) local package contains the model definition and the training script.\n", + "This ensures we use the same model definition for both training and predictions.\n", + "\n", + "We need both of these local modules for predictions.\n" + ], + "metadata": { + "id": "r5OijZcuInAe" + }, + "id": "r5OijZcuInAe" + }, + { + "cell_type": "code", + "source": [ + "# Upgrade `setuptools` to install packages from pyproject.toml files.\n", + "!pip install --quiet --upgrade --no-warn-conflicts pip setuptools\n", + "\n", + "# We need `build` and `virtualenv` to build the local packages.\n", + "!pip install --quiet build virtualenv\n", + "\n", + "# Install the `weather-data` and `weather-model` local packages.\n", + "!pip install serving/weather-data serving/weather-model" + ], + "metadata": { + "id": "AlcsK6pd-x0I" + }, + "execution_count": null, + "outputs": [], + "id": "AlcsK6pd-x0I" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "mHvEEW6oyFGV" + }, + "source": [ + "## ☁️ My Google Cloud resources\n", + "\n", + "Make sure you have followed these steps to configure your Google Cloud project:\n", + "\n", + "1. Enable the APIs: _Earth Engine, and Cloud Run_\n", + "\n", + " \n", + "\n", + "1. Create or use an existing Cloud Storage bucket.\n", + "\n", + " \n", + "\n", + "1. Register your\n", + " [Compute Engine default service account](https://console.cloud.google.com/iam-admin/iam)\n", + " on Earth Engine.\n", + "\n", + " \n", + "\n", + "Once you have everything ready, you can go ahead and fill in your Google Cloud resources in the following code cell.\n", + "Make sure you run it!" + ], + "id": "mHvEEW6oyFGV" + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "YMPNUR0pyRvy" + }, + "outputs": [], + "source": [ + "from __future__ import annotations\n", + "\n", + "import os\n", + "from google.colab import auth\n", + "\n", + "# Please fill in these values.\n", + "project = \"\" #@param {type:\"string\"}\n", + "bucket = \"\" #@param {type:\"string\"}\n", + "location = \"us-central1\" #@param {type:\"string\"}\n", + "\n", + "# Quick input validations.\n", + "assert project, \"⚠️ Please provide a Google Cloud project ID\"\n", + "assert bucket, \"⚠️ Please provide a Cloud Storage bucket name\"\n", + "assert not bucket.startswith('gs://'), f\"⚠️ Please remove the gs:// prefix from the bucket name: {bucket}\"\n", + "assert location, \"⚠️ Please provide a Google Cloud location\"\n", + "\n", + "# Authenticate to Colab.\n", + "auth.authenticate_user()\n", + "\n", + "# Set GOOGLE_CLOUD_PROJECT for google.auth.default().\n", + "os.environ['GOOGLE_CLOUD_PROJECT'] = project\n", + "\n", + "# Set the gcloud project for other gcloud commands.\n", + "!gcloud config set project {project}" + ], + "id": "YMPNUR0pyRvy" + }, + { + "cell_type": "markdown", + "source": [ + "# 💻 Local predictions\n", + "\n", + "First, we get the input data for the model.\n", + "We get the labels as well, just to compare our model's predictions with what the real precipitation actually was." + ], + "metadata": { + "id": "-CZNz7BzbQk5" + }, + "id": "-CZNz7BzbQk5" + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": { + "id": "5ytr16jFOiYT", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4638a811-f995-47ed-c05d-f746b933c8e6" + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "inputs : float32 (128, 128, 52)\n", + "labels : float32 (128, 128, 2)\n" + ] + } + ], + "source": [ + "from datetime import datetime\n", + "from weather.data import get_inputs_patch, get_labels_patch\n", + "\n", + "date = datetime(2019, 9, 2, 18)\n", + "point = (-78.322, 25.507) # (longitude, latitude)\n", + "patch_size = 128\n", + "\n", + "inputs = get_inputs_patch(date, point, patch_size)\n", + "labels = get_labels_patch(date, point, patch_size)\n", + "\n", + "print(f\"inputs : {inputs.dtype} {inputs.shape}\")\n", + "print(f\"labels : {labels.dtype} {labels.shape}\")" + ], + "id": "5ytr16jFOiYT" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wf0KKasKfvEJ" + }, + "source": [ + "Here's how the input data looks like." + ], + "id": "wf0KKasKfvEJ" + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": { + "id": "yViw-C4SpOks", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 517 + }, + "outputId": "c610b6b3-4baa-4b2d-c689-e5610508e02a" + }, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "

\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ], + "source": [ + "from visualize import show_inputs\n", + "\n", + "# Show the input data for the example.\n", + "show_inputs(inputs)" + ], + "id": "yViw-C4SpOks" + }, + { + "cell_type": "markdown", + "source": [ + "First, let's see how the Vertex AI model we trained performs." + ], + "metadata": { + "id": "ZJp9RSWYWF_O" + }, + "id": "ZJp9RSWYWF_O" + }, + { + "cell_type": "code", + "source": [ + "model_path_gcs = f\"gs://{bucket}/weather/model\"\n", + "\n", + "!mkdir -p model\n", + "!gsutil cp {model_path_gcs}/* model" + ], + "metadata": { + "id": "5w_uNjluhDMG" + }, + "id": "5w_uNjluhDMG", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from weather.model import WeatherModel\n", + "\n", + "model = WeatherModel.from_pretrained(\"model\")" + ], + "metadata": { + "id": "gjHinh3bEsNi" + }, + "id": "gjHinh3bEsNi", + "execution_count": 46, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from visualize import show_outputs\n", + "\n", + "predictions = model.predict(inputs.tolist())\n", + "show_outputs(predictions)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 317 + }, + "id": "wcebAn0-EpJW", + "outputId": "6960f9f4-00bd-4266-ce2c-b9b1e965bfff" + }, + "id": "wcebAn0-EpJW", + "execution_count": 47, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The results don't look too bad for the small amount of data and time it took to train.\n", + "\n", + "Now, let's see how the pre-trained model in the [`serving/model/`](serving/model/) directory performs.\n", + "We trained this model for 1,000 epochs with around 100,000 examples, so you don't have to 🙂.\n", + "That is around 800,000 training examples after data augmentation." + ], + "metadata": { + "id": "QCsTwERzWZKL" + }, + "id": "QCsTwERzWZKL" + }, + { + "cell_type": "code", + "source": [ + "from weather.model import WeatherModel\n", + "\n", + "model_path = \"serving/model\"\n", + "model = WeatherModel.from_pretrained(model_path)" + ], + "metadata": { + "id": "8qcpITinE1cZ" + }, + "id": "8qcpITinE1cZ", + "execution_count": 48, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "from visualize import show_outputs\n", + "\n", + "predictions = model.predict(inputs.tolist())\n", + "show_outputs(predictions)" + ], + "metadata": { + "id": "68rZ4GhiWt-N", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 317 + }, + "outputId": "51e390bf-cf81-4b9f-c82c-197b9daa7dda" + }, + "id": "68rZ4GhiWt-N", + "execution_count": 49, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "And for reference, this is how the ground truth looks like for this example." + ], + "metadata": { + "id": "4OI06jmXVw8Z" + }, + "id": "4OI06jmXVw8Z" + }, + { + "cell_type": "code", + "source": [ + "# Show the real ground truth for reference.\n", + "show_outputs(labels)" + ], + "metadata": { + "id": "RWnblCElbIsj", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 317 + }, + "outputId": "3eb2438f-ad65-4108-c7d9-f0101dcb32ca" + }, + "id": "RWnblCElbIsj", + "execution_count": 50, + "outputs": [ + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "# ☁️ Cloud Run predictions\n", + "\n", + "[Cloud Run](https://cloud.google.com/run) allows us to deploy\n", + "[_serverless_](https://en.wikipedia.org/wiki/Serverless_computing)\n", + "web services with a\n", + "[REST API](https://en.wikipedia.org/wiki/Representational_state_transfer).\n", + "Cloud Run autoscales from zero workers when there are no requests, to enough workers to handle high traffic of requests.\n", + "It's a great and efficient option to host a predictions server without having to have servers up and running all the time.\n", + "\n", + "Cloud Run autoscales from zero workers when there are no requests, to enough workers to handle high traffic of requests.\n", + "It's a great and efficient option to host a predictions server without having to have servers up and running all the time.\n", + "\n", + "We deploy our service to Cloud Run\n", + "[directly from source code](https://cloud.google.com/run/docs/deploying-source-code)\n", + "so we don't need to build the container image first.\n", + "Behind the scenes, this command uses Google Cloud [Buildpacks](https://buildpacks.io/) and\n", + "[Cloud Build](https://cloud.google.com/build)\n", + "to automatically build a container image from our source code in the [`serving`](serving) directory." + ], + "metadata": { + "id": "WK76ae88bVpf" + }, + "id": "WK76ae88bVpf" + }, + { + "cell_type": "code", + "source": [ + "service_name = \"weather-forecasting\"\n", + "\n", + "!gcloud run deploy {service_name} \\\n", + " --source=\"serving/\" \\\n", + " --region=\"{location}\" \\\n", + " --memory=\"1G\" \\\n", + " --no-allow-unauthenticated" + ], + "metadata": { + "id": "OJaNgKPob1M7" + }, + "id": "OJaNgKPob1M7", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "> 💡 Look at your Cloud Run services: https://console.cloud.google.com/run\n", + "\n", + "When the Cloud Run service is deployed, you'll see some information including the Service URL.\n", + "\n", + "We can also get the URL from the Cloud Console, or with [`gcloud run services describe`](https://cloud.google.com/sdk/gcloud/reference/run/services/describe)." + ], + "metadata": { + "id": "zACgCOqgUIw5" + }, + "id": "zACgCOqgUIw5" + }, + { + "cell_type": "code", + "source": [ + "output = !gcloud run services describe \"{service_name}\" --region=\"{location}\" --format=\"get(status.url)\"\n", + "model_url = output[0]\n", + "print(f\"model_url: {model_url}\")" + ], + "metadata": { + "id": "Q7WsNPyCIYsD" + }, + "id": "Q7WsNPyCIYsD", + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "Since we only accept authorized calls in our web service, we also need to authenticate each call.\n", + "\n", + "Colab notebooks aren't tied to a Service Account, so we can't get the identity token here.\n", + "\n", + "Run this command in your **local terminal** or in [**Cloud Shell**](https://shell.cloud.google.com/?show=terminal), and then copy-paste your **🔑 Identity token** from the output of the command into the following variable and run that cell.\n", + "\n", + "```sh\n", + "# Run in a terminal and copy-paste the outputs.\n", + "gcloud auth print-identity-token\n", + "```" + ], + "metadata": { + "id": "bt4iuVwpUOOk" + }, + "id": "bt4iuVwpUOOk" + }, + { + "cell_type": "code", + "source": [ + "identity_token = \"\" #@param {type:\"string\"}" + ], + "metadata": { + "cellView": "form", + "id": "a4ImFWv9USV0" + }, + "id": "a4ImFWv9USV0", + "execution_count": 17, + "outputs": [] + }, + { + "cell_type": "markdown", + "source": [ + "> 💡 Your identity token changes over time, so you might need to generate a new one if you're getting authentication errors.\n", + "\n", + "> 💡 To learn more on how to authenticate to Cloud Run, see the\n", + "> [Authentication overview](https://cloud.google.com/run/docs/authenticating/overview) page.\n", + "\n", + "Now let's get some predictions from our Cloud Run service.\n", + "The service first gets the input data from Earth Engine, gets the predictions and returns them as JSON." + ], + "metadata": { + "id": "05meYYX9UZC3" + }, + "id": "05meYYX9UZC3" + }, + { + "cell_type": "code", + "source": [ + "import numpy as np\n", + "import requests\n", + "\n", + "response = requests.get(\n", + " f\"{model_url}/predict/2019-09-02T18:00/25.507,-78.322\",\n", + " headers={\"Authorization\": f\"Bearer {identity_token}\"},\n", + " params={\"patch-size\": 256, \"include-inputs\": True},\n", + ")\n", + "\n", + "response.raise_for_status()\n", + "results = response.json()\n", + "\n", + "inputs = np.array(results['inputs'], np.float32)\n", + "predictions = np.array(results['predictions'], np.uint8)\n", + "print(f\"inputs : {inputs.dtype.name} {inputs.shape}\")\n", + "print(f\"predictions : {predictions.dtype.name} {predictions.shape}\")" + ], + "metadata": { + "id": "Y6Tx6x9wUbMG", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "7ede7472-d884-4388-e629-c3c860fc5200" + }, + "id": "Y6Tx6x9wUbMG", + "execution_count": 57, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "inputs : float32 (256, 256, 52)\n", + "predictions : uint8 (256, 256, 2)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "from visualize import show_inputs, show_outputs\n", + "\n", + "print(\"+\" + \"-\" * 80)\n", + "print(\"| Inputs\")\n", + "show_inputs(inputs)\n", + "print(\"+\" + \"-\" * 80)\n", + "print(\"| Predictions\")\n", + "show_outputs(predictions)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 886 + }, + "id": "JksIvcATNK6T", + "outputId": "dfca4866-3444-4d19-b832-fb6a58841a38" + }, + "id": "JksIvcATNK6T", + "execution_count": 58, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------\n", + "| Inputs\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "+--------------------------------------------------------------------------------\n", + "| Predictions\n" + ] + }, + { + "output_type": "display_data", + "data": { + "text/html": [ + "\n", + "\n", + "\n", + "
\n", + "
\n", + "\n", + "" + ] + }, + "metadata": {} + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "> 💡 To learn about more options for predictions in Cloud, take a look at the [🌍 Land cover classification](../land-cover-classification) sample." + ], + "metadata": { + "id": "Qi6_DWuEodrK" + }, + "id": "Qi6_DWuEodrK" + }, + { + "cell_type": "markdown", + "metadata": { + "id": "2hIMtf7JCiMi" + }, + "source": [ + "# ⛵ Further exploration\n", + "\n", + "This notebook demonstrated a simple model to start exploring the problem of weather forecasting using deep neural networks. The model has less than 100k parameters and only a few Conv2D layers to keep training time short. Even so, the model is able to distinguish cloud patterns for broad rain vs no rain detection.\n", + "\n", + "There has been a lot of interesting research work on weather nowcasting recently, especially with [U-Net](https://en.wikipedia.org/wiki/U-Net) style model architectures. If you are interested in diving deeper, here are some articles from Google Research:\n", + "\n", + "* [Google Research blog on nowcasting](https://ai.googleblog.com/2021/11/metnet-2-deep-learning-for-12-hour.html)\n", + "* [MetNet paper](https://arxiv.org/abs/2003.12140)" + ], + "id": "2hIMtf7JCiMi" + } + ], + "metadata": { + "colab": { + "provenance": [], + "toc_visible": true + }, + "environment": { + "kernel": "python3", + "name": "tf2-gpu.2-6.m82", + "type": "gcloud", + "uri": "gcr.io/deeplearning-platform-release/tf2-gpu.2-6:m82" + }, + "gpuClass": "standard", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.8" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/people-and-planet-ai/weather-forecasting/serving/.gcloudignore b/people-and-planet-ai/weather-forecasting/serving/.gcloudignore new file mode 100644 index 000000000000..260aaf54eccb --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/.gcloudignore @@ -0,0 +1,4 @@ +**/*.egg-info +**/__pycache__ +**/build +**/dist diff --git a/people-and-planet-ai/weather-forecasting/serving/Dockerfile.backup b/people-and-planet-ai/weather-forecasting/serving/Dockerfile.backup new file mode 100644 index 000000000000..2ee37566e488 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/Dockerfile.backup @@ -0,0 +1,24 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM python:slim + +WORKDIR /app +COPY . ./ + +RUN pip install --no-cache-dir --upgrade pip && \ + pip install --no-cache-dir -r requirements.txt && \ + pip check + +CMD exec gunicorn main:app --threads=8 --timeout=0 diff --git a/people-and-planet-ai/weather-forecasting/serving/Procfile b/people-and-planet-ai/weather-forecasting/serving/Procfile new file mode 100644 index 000000000000..baf3673c3d4b --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/Procfile @@ -0,0 +1 @@ +web: gunicorn main:app --threads=8 --timeout=0 diff --git a/people-and-planet-ai/weather-forecasting/serving/main.py b/people-and-planet-ai/weather-forecasting/serving/main.py new file mode 100644 index 000000000000..4a1b1099a32d --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/main.py @@ -0,0 +1,58 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from datetime import datetime +import os + +from flask import Flask, request + +from weather.data import get_inputs_patch +from weather.model import WeatherModel + +app = Flask(__name__) + +MODEL = WeatherModel.from_pretrained("model") + + +def to_bool(x: str) -> bool: + return x.lower() == "true" + + +@app.route("/") +def ping() -> dict: + """Checks that we can communicate with the service and get arguments.""" + return { + "response": "✅ I got your request!", + "args": request.args, + } + + +@app.route("/predict//,") +def predict(iso_date: str, lat: float, lon: float) -> dict: + # Optional HTTP request parameters. + # https://en.wikipedia.org/wiki/Query_string + patch_size = request.args.get("patch-size", 128, type=int) + include_inputs = request.args.get("include-inputs", False, type=to_bool) + + date = datetime.fromisoformat(iso_date) + inputs = get_inputs_patch(date, (lon, lat), patch_size).tolist() + predictions = MODEL.predict(inputs).tolist() + + if include_inputs: + return {"inputs": inputs, "predictions": predictions} + return {"predictions": predictions} + + +if __name__ == "__main__": + app.run(debug=True, host="0.0.0.0", port=int(os.environ.get("PORT", 8080))) diff --git a/people-and-planet-ai/weather-forecasting/serving/model/config.json b/people-and-planet-ai/weather-forecasting/serving/model/config.json new file mode 100644 index 000000000000..b3f8e036a8f6 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/model/config.json @@ -0,0 +1,136 @@ +{ + "architectures": [ + "WeatherModel" + ], + "kernel_size": [ + 3, + 3 + ], + "mean": [ + [ + [ + [ + 1.038480520248413, + 2.3580005168914795, + 6.121784210205078, + 461.6368103027344, + 406.2836608886719, + 502.3795166015625, + 109.16022491455078, + 285.9358825683594, + 252.71353149414062, + 5385.9697265625, + 2188.639892578125, + 2338.22216796875, + 2311.983642578125, + 2451.591064453125, + 2668.61279296875, + 2717.432861328125, + 2663.63134765625, + 2630.04052734375, + 2795.269287109375, + 495.73779296875, + 440.49481201171875, + 518.4873046875, + 144.8490753173828, + 264.603515625, + 251.7928924560547, + 4901.87060546875, + 2130.96533203125, + 2260.6865234375, + 2228.587646484375, + 2319.893310546875, + 2580.951416015625, + 2602.4677734375, + 2546.56591796875, + 2516.347412109375, + 2698.1044921875, + 555.55859375, + 498.544677734375, + 555.2677001953125, + 215.42405700683594, + 237.53317260742188, + 257.0519104003906, + 4218.6064453125, + 2042.2440185546875, + 2141.38671875, + 2101.939208984375, + 2129.681640625, + 2455.726318359375, + 2437.5830078125, + 2379.735107421875, + 2354.409423828125, + 2557.901611328125, + 1722.7576904296875 + ] + ] + ] + ], + "model_type": "weather", + "num_hidden1": 64, + "num_hidden2": 128, + "num_inputs": 52, + "num_outputs": 2, + "std": [ + [ + [ + [ + 3.4689180850982666, + 5.461374759674072, + 8.733540534973145, + 639.7587280273438, + 586.5775146484375, + 665.25, + 249.64366149902344, + 364.31646728515625, + 327.7474670410156, + 1921.8106689453125, + 265.7781066894531, + 332.87744140625, + 339.4986572265625, + 486.11968994140625, + 358.6761779785156, + 429.5836486816406, + 445.0389099121094, + 438.9586486816406, + 390.2461853027344, + 719.083251953125, + 655.0225219726562, + 719.3421020507812, + 309.92315673828125, + 349.9136657714844, + 336.32281494140625, + 2015.7835693359375, + 295.84429931640625, + 371.2146911621094, + 375.3916015625, + 515.28662109375, + 361.04150390625, + 453.0198974609375, + 469.2937927246094, + 464.7489929199219, + 415.37939453125, + 823.309814453125, + 759.4841918945312, + 802.1375122070312, + 405.1722412109375, + 320.95391845703125, + 351.9433288574219, + 2017.7557373046875, + 324.00640869140625, + 404.7369079589844, + 403.4670715332031, + 523.7477416992188, + 346.42626953125, + 458.5444641113281, + 475.87103271484375, + 474.52099609375, + 427.3804931640625, + 1603.6929931640625 + ] + ] + ] + ], + "torch_dtype": "float32", + "transformers_version": "4.25.1" +} diff --git a/people-and-planet-ai/weather-forecasting/serving/model/pytorch_model.bin b/people-and-planet-ai/weather-forecasting/serving/model/pytorch_model.bin new file mode 100644 index 000000000000..55940425d472 Binary files /dev/null and b/people-and-planet-ai/weather-forecasting/serving/model/pytorch_model.bin differ diff --git a/people-and-planet-ai/weather-forecasting/serving/model/training_args.bin b/people-and-planet-ai/weather-forecasting/serving/model/training_args.bin new file mode 100644 index 000000000000..a76f32e1ec10 Binary files /dev/null and b/people-and-planet-ai/weather-forecasting/serving/model/training_args.bin differ diff --git a/people-and-planet-ai/weather-forecasting/serving/requirements.txt b/people-and-planet-ai/weather-forecasting/serving/requirements.txt new file mode 100644 index 000000000000..8d1b02ee4ed4 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/requirements.txt @@ -0,0 +1,7 @@ +Flask==2.2.2 +gunicorn==20.1.0 +torch==1.13.1 + +# Local packages. +./weather-data +./weather-model diff --git a/people-and-planet-ai/weather-forecasting/serving/weather-data/pyproject.toml b/people-and-planet-ai/weather-forecasting/serving/weather-data/pyproject.toml new file mode 100644 index 000000000000..ed4bb423cc40 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/weather-data/pyproject.toml @@ -0,0 +1,21 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# https://peps.python.org/pep-0621 +[project] +name = "weather-data" +version = "1.0.0" +dependencies = [ + "earthengine-api==0.1.336", +] diff --git a/people-and-planet-ai/weather-forecasting/serving/weather-data/weather/__init__.py b/people-and-planet-ai/weather-forecasting/serving/weather-data/weather/__init__.py new file mode 100644 index 000000000000..7ba50f9339dd --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/weather-data/weather/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/people-and-planet-ai/weather-forecasting/serving/weather-data/weather/data.py b/people-and-planet-ai/weather-forecasting/serving/weather-data/weather/data.py new file mode 100644 index 000000000000..36a75f93160b --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/weather-data/weather/data.py @@ -0,0 +1,231 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data utilities to grab data from Earth Engine. +Meant to be used for both training and prediction so the model is +trained on exactly the same data that will be used for predictions. +""" + +from __future__ import annotations + +from datetime import datetime, timedelta +import io + +import ee +from google.api_core import exceptions, retry +import google.auth +import numpy as np +from numpy.lib.recfunctions import structured_to_unstructured +import requests + +# Constants. +SCALE = 10000 # meters per pixel +INPUT_HOUR_DELTAS = [-4, -2, 0] +OUTPUT_HOUR_DELTAS = [2, 6] +WINDOW = timedelta(days=1) + +# Authenticate and initialize Earth Engine with the default credentials. +credentials, project = google.auth.default( + scopes=[ + "https://www.googleapis.com/auth/cloud-platform", + "https://www.googleapis.com/auth/earthengine", + ] +) + +# Use the Earth Engine High Volume endpoint. +# https://developers.google.com/earth-engine/cloud/highvolume +ee.Initialize( + credentials.with_quota_project(None), + project=project, + opt_url="https://earthengine-highvolume.googleapis.com", +) + + +def get_gpm(date: datetime) -> ee.Image: + """Gets a Global Precipitation Measurement image for the selected date. + + For more information: + https://developers.google.com/earth-engine/datasets/catalog/NASA_GPM_L3_IMERG_V06 + + Args: + date: Date to take a snapshot from. + + Returns: An Earth Engine image. + """ + window_start = (date - WINDOW).isoformat() + window_end = date.isoformat() + return ( + ee.ImageCollection("NASA/GPM_L3/IMERG_V06") + .filterDate(window_start, window_end) + .select("precipitationCal") + .sort("system:time_start", False) + .mosaic() + .unmask(0) + .float() + ) + + +def get_gpm_sequence(dates: list[datetime]) -> ee.Image: + """Gets a Global Precipitation Measurement sequence for the selected dates. + + Args: + dates: List of dates to get images from. + + Returns: An Earth Engine image. + """ + images = [get_gpm(date) for date in dates] + return ee.ImageCollection(images).toBands() + + +def get_goes16(date: datetime) -> ee.Image: + """Gets a GOES 16 image for the selected date. + + For more information: + https://developers.google.com/earth-engine/datasets/catalog/NOAA_GOES_16_MCMIPF + + Args: + date: Date to take a snapshot from. + + Returns: An Earth Engine image. + """ + window_start = (date - WINDOW).isoformat() + window_end = date.isoformat() + return ( + ee.ImageCollection("NOAA/GOES/16/MCMIPF") + .filterDate(window_start, window_end) + .select("CMI_C.*") + .sort("system:time_start", False) + .mosaic() + .unmask(0) + .float() + ) + + +def get_goes16_sequence(dates: list[datetime]) -> ee.Image: + """Gets a GOES 16 sequence for the selected dates. + + Args: + dates: List of dates to get images from. + + Returns: An Earth Engine image. + """ + images = [get_goes16(date) for date in dates] + return ee.ImageCollection(images).toBands() + + +def get_elevation() -> ee.Image: + """Gets a digital elevation map. + + For more information: + https://developers.google.com/earth-engine/datasets/catalog/MERIT_DEM_v1_0_3 + + Returns: An Earth Engine image. + """ + return ee.Image("MERIT/DEM/v1_0_3").rename("elevation").unmask(0).float() + + +def get_inputs_image(date: datetime) -> ee.Image: + """Gets an Earth Engine image with all the inputs for the model. + + Args: + date: Date to take a snapshot from. + + Returns: An Earth Engine image. + """ + dates = [date + timedelta(hours=h) for h in INPUT_HOUR_DELTAS] + precipitation = get_gpm_sequence(dates) + cloud_and_moisture = get_goes16_sequence(dates) + elevation = get_elevation() + return ee.Image([precipitation, cloud_and_moisture, elevation]) + + +def get_labels_image(date: datetime) -> ee.Image: + """Gets an Earth Engine image with the labels to train the model. + + Args: + date: Date to take a snapshot from. + + Returns: An Earth Engine image. + """ + dates = [date + timedelta(hours=h) for h in OUTPUT_HOUR_DELTAS] + return get_gpm_sequence(dates) + + +def get_inputs_patch(date: datetime, point: tuple, patch_size: int) -> np.ndarray: + """Gets the patch of pixels for the inputs. + + Args: + date: The date of interest. + point: A (longitude, latitude) coordinate. + patch_size: Size in pixels of the surrounding square patch. + + Returns: The pixel values of a patch as a NumPy array. + """ + image = get_inputs_image(date) + patch = get_patch(image, point, patch_size, SCALE) + return structured_to_unstructured(patch) + + +def get_labels_patch(date: datetime, point: tuple, patch_size: int) -> np.ndarray: + """Gets the patch of pixels for the labels. + + Args: + date: The date of interest. + point: A (longitude, latitude) coordinate. + patch_size: Size in pixels of the surrounding square patch. + + Returns: The pixel values of a patch as a NumPy array. + """ + image = get_labels_image(date) + patch = get_patch(image, point, patch_size, SCALE) + return structured_to_unstructured(patch) + + +@retry.Retry() +def get_patch(image: ee.Image, point: tuple, patch_size: int, scale: int) -> np.ndarray: + """Fetches a patch of pixels from Earth Engine. + + It retries if we get error "429: Too Many Requests". + + Args: + image: Image to get the patch from. + point: A (longitude, latitude) pair for the point of interest. + patch_size: Size in pixels of the surrounding square patch. + scale: Number of meters per pixel. + + Raises: + requests.exceptions.RequestException + + Returns: + The requested patch of pixels as a structured + NumPy array with shape (width, height). + """ + geometry = ee.Geometry.Point(point) + url = image.getDownloadURL( + { + "region": geometry.buffer(scale * patch_size / 2, 1).bounds(1), + "dimensions": [patch_size, patch_size], + "format": "NPY", + } + ) + + # If we get "429: Too Many Requests" errors, it's safe to retry the request. + # The Retry library only works with `google.api_core` exceptions. + response = requests.get(url) + if response.status_code == 429: + raise exceptions.TooManyRequests(response.text) + + # Still raise any other exceptions to make sure we got valid data. + response.raise_for_status() + return np.load(io.BytesIO(response.content), allow_pickle=True) diff --git a/people-and-planet-ai/weather-forecasting/serving/weather-model/pyproject.toml b/people-and-planet-ai/weather-forecasting/serving/weather-model/pyproject.toml new file mode 100644 index 000000000000..4bcf1278d8df --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/weather-model/pyproject.toml @@ -0,0 +1,25 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# https://peps.python.org/pep-0621 +[project] +name = "weather-model" +version = "1.0.0" +dependencies = [ + "datasets==2.8.0", + "transformers==4.25.1", +] + +[project.scripts] +weather-trainer = "weather.trainer:main" diff --git a/people-and-planet-ai/weather-forecasting/serving/weather-model/weather/__init__.py b/people-and-planet-ai/weather-forecasting/serving/weather-model/weather/__init__.py new file mode 100644 index 000000000000..7ba50f9339dd --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/weather-model/weather/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/people-and-planet-ai/weather-forecasting/serving/weather-model/weather/model.py b/people-and-planet-ai/weather-forecasting/serving/weather-model/weather/model.py new file mode 100644 index 000000000000..3eb274dbb3b9 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/weather-model/weather/model.py @@ -0,0 +1,157 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Defines a Fully Convolutional Network to predict precipitation.""" + +from __future__ import annotations + +from typing import Any as AnyType, Optional + +from datasets.arrow_dataset import Dataset +import numpy as np +import torch +from transformers import PretrainedConfig, PreTrainedModel + + +class WeatherConfig(PretrainedConfig): + """A custom Hugging Face config for a WeatherModel. + + This contains all the hyperparameters for the model, including the + mean and standard deviation used for the Normalization layer in the model. + + For more information: + https://huggingface.co/docs/transformers/main/en/custom_models#writing-a-custom-configuration + """ + + model_type = "weather" + + def __init__( + self, + mean: list = [], + std: list = [], + num_inputs: int = 52, + num_hidden1: int = 64, + num_hidden2: int = 128, + num_outputs: int = 2, + kernel_size: tuple[int, int] = (3, 3), + **kwargs: AnyType, + ) -> None: + self.mean = mean + self.std = std + self.num_inputs = num_inputs + self.num_hidden1 = num_hidden1 + self.num_hidden2 = num_hidden2 + self.num_outputs = num_outputs + self.kernel_size = kernel_size + super().__init__(**kwargs) + + +class WeatherModel(PreTrainedModel): + """A custom Hugging Face model. + + For more information: + https://huggingface.co/docs/transformers/main/en/custom_models#writing-a-custom-model + """ + + config_class = WeatherConfig + + def __init__(self, config: WeatherConfig) -> None: + super().__init__(config) + self.layers = torch.nn.Sequential( + Normalization(config.mean, config.std), + MoveDim(-1, 1), # convert to channels-first + torch.nn.Conv2d(config.num_inputs, config.num_hidden1, config.kernel_size), + torch.nn.ReLU(), + torch.nn.ConvTranspose2d( + config.num_hidden1, config.num_hidden2, config.kernel_size + ), + torch.nn.ReLU(), + MoveDim(1, -1), # convert to channels-last + torch.nn.Linear(config.num_hidden2, config.num_outputs), + torch.nn.ReLU(), # precipitation cannot be negative + ) + + def forward( + self, inputs: torch.Tensor, labels: Optional[torch.Tensor] = None + ) -> dict[str, torch.Tensor]: + """Computes predictions as expected by ModelOutputs. + + If `labels` are passed, it computes the loss between the model's + predictions and the actual labels. + + For more information: + https://huggingface.co/docs/transformers/main/en/main_classes/output + + Args: + inputs: Input data. + labels: Ground truth data. + + Returns: + {"loss": loss, "logits": predictions} if `labels` is provided. + {"logits": predictions} otherwise. + """ + predictions = self.layers(inputs) + if labels is None: + return {"logits": predictions} + + loss_fn = torch.nn.SmoothL1Loss() + loss = loss_fn(predictions, labels) + return {"loss": loss, "logits": predictions} + + @staticmethod + def create(inputs: Dataset, **kwargs: AnyType) -> WeatherModel: + """Creates a new WeatherModel calculating the + mean and standard deviation from a dataset.""" + data = np.array(inputs, np.float32) + mean = data.mean(axis=(0, 1, 2))[None, None, None, :] + std = data.std(axis=(0, 1, 2))[None, None, None, :] + config = WeatherConfig(mean.tolist(), std.tolist(), **kwargs) + return WeatherModel(config) + + def predict(self, inputs: AnyType) -> np.ndarray: + """Predicts a single request.""" + return self.predict_batch(torch.as_tensor([inputs]))[0] + + def predict_batch(self, inputs_batch: AnyType) -> np.ndarray: + """Predicts a batch of requests.""" + device = "cuda" if torch.cuda.is_available() else "cpu" + model = self.to(device) + with torch.no_grad(): + outputs = model(torch.as_tensor(inputs_batch, device=device)) + predictions = outputs["logits"] + return predictions.cpu().numpy() + + +class Normalization(torch.nn.Module): + """Preprocessing normalization layer with z-score.""" + + def __init__(self, mean: AnyType, std: AnyType) -> None: + super().__init__() + self.mean = torch.nn.Parameter(torch.as_tensor(mean)) + self.std = torch.nn.Parameter(torch.as_tensor(std)) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return (x - self.mean) / self.std + + +class MoveDim(torch.nn.Module): + """Moves a dimension axis to another position.""" + + def __init__(self, src: int, dest: int) -> None: + super().__init__() + self.src = src + self.dest = dest + + def forward(self, x: torch.Tensor) -> torch.Tensor: + return x.moveaxis(self.src, self.dest) diff --git a/people-and-planet-ai/weather-forecasting/serving/weather-model/weather/trainer.py b/people-and-planet-ai/weather-forecasting/serving/weather-model/weather/trainer.py new file mode 100644 index 000000000000..a7be177c48ef --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/serving/weather-model/weather/trainer.py @@ -0,0 +1,181 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Trains a model to predict precipitation.""" + +from __future__ import annotations + +from glob import glob +import os + +from datasets.arrow_dataset import Dataset +from datasets.dataset_dict import DatasetDict +import numpy as np +from transformers import Trainer, TrainingArguments + +from weather.model import WeatherModel + + +# Default values. +EPOCHS = 100 +BATCH_SIZE = 512 +TRAIN_TEST_RATIO = 0.9 + +# Constants. +NUM_DATASET_READ_PROC = 16 # number of processes to read data files in parallel +NUM_DATASET_PROC = os.cpu_count() or 8 # number of processes for CPU transformations + + +def read_dataset(data_path: str, train_test_ratio: float) -> DatasetDict: + """Reads data files into a Dataset with train/test splits.""" + + def read_data_file(item: dict[str, str]) -> dict[str, np.ndarray]: + with open(item["filename"], "rb") as f: + npz = np.load(f) + return {"inputs": npz["inputs"], "labels": npz["labels"]} + + def flatten(batch: dict) -> dict: + return {key: np.concatenate(values) for key, values in batch.items()} + + files = glob(os.path.join(data_path, "*.npz")) + dataset = ( + Dataset.from_dict({"filename": files}) + .map( + read_data_file, + num_proc=NUM_DATASET_READ_PROC, + remove_columns=["filename"], + ) + .map(flatten, batched=True, num_proc=NUM_DATASET_PROC) + ) + return dataset.train_test_split(train_size=train_test_ratio, shuffle=True) + + +def augmented(dataset: Dataset) -> Dataset: + """Augments dataset by rotating and flipping the examples.""" + + def augment(values: list) -> np.ndarray: + transformed = [ + np.rot90(values, 0, (1, 2)), + np.rot90(values, 1, (1, 2)), + np.rot90(values, 2, (1, 2)), + np.rot90(values, 3, (1, 2)), + np.flip(np.rot90(values, 0, (1, 2)), axis=1), + np.flip(np.rot90(values, 1, (1, 2)), axis=1), + np.flip(np.rot90(values, 2, (1, 2)), axis=1), + np.flip(np.rot90(values, 3, (1, 2)), axis=1), + ] + return np.concatenate(transformed) + + return dataset.map( + lambda batch: {key: augment(values) for key, values in batch.items()}, + batched=True, + num_proc=NUM_DATASET_PROC, + ) + + +def run( + data_path: str, + model_path: str, + epochs: int = EPOCHS, + batch_size: int = BATCH_SIZE, + train_test_ratio: float = TRAIN_TEST_RATIO, + from_checkpoint: bool = False, +) -> None: + """Trains a new WeatherModel. + + Args: + data_path: Directory path to read data files from. + model_path Directory path to write the trained model to. + epochs: Number of times to go through the training dataset. + batch_size: Number of training examples to learn from at once. + train_test_ratio: Ratio of examples to use for training and for testing. + from_checkpoint: Whether or not to resume from latest checkpoint. + """ + + print(f"data_path: {data_path}") + print(f"model_path: {model_path}") + print(f"epochs: {epochs}") + print(f"batch_size: {batch_size}") + print(f"train_test_ratio: {train_test_ratio}") + print("-" * 40) + + dataset = read_dataset(data_path, train_test_ratio) + print(dataset) + + model = WeatherModel.create(dataset["train"]["inputs"]) + print(model.config) + print(model) + + training_args = TrainingArguments( + output_dir=os.path.join(model_path, "checkpoints"), + per_device_train_batch_size=batch_size, + per_device_eval_batch_size=batch_size, + num_train_epochs=epochs, + logging_strategy="epoch", + evaluation_strategy="epoch", + ) + trainer = Trainer( + model, + training_args, + train_dataset=augmented(dataset["train"]), + eval_dataset=dataset["test"], + ) + trainer.train(resume_from_checkpoint=from_checkpoint) + trainer.save_model(model_path) + + +def main() -> None: + import argparse + + parser = argparse.ArgumentParser() + parser.add_argument( + "--data-path", + required=True, + help="Directory path to read data files from.", + ) + parser.add_argument( + "--model-path", + required=True, + help="Directory path to write the trained model to.", + ) + parser.add_argument( + "--epochs", + type=int, + default=EPOCHS, + help="Number of times to go through the training dataset.", + ) + parser.add_argument( + "--batch-size", + type=int, + default=BATCH_SIZE, + help="Number of training examples to learn from at once.", + ) + parser.add_argument( + "--train-test-ratio", + type=float, + default=TRAIN_TEST_RATIO, + help="Ratio of examples to use for training and for testing.", + ) + parser.add_argument( + "--from-checkpoint", + action="store_true", + help="Whether or not to resume from latest checkpoint.", + ) + args = parser.parse_args() + + run(**vars(args)) + + +if __name__ == "__main__": + main() diff --git a/people-and-planet-ai/weather-forecasting/tests/dataset_tests/noxfile_config.py b/people-and-planet-ai/weather-forecasting/tests/dataset_tests/noxfile_config.py new file mode 100644 index 000000000000..ee3db414b360 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/dataset_tests/noxfile_config.py @@ -0,0 +1,43 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be imported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + # 💡 Only test with Python 3.10 + "ignored_versions": ["2.7", "3.6", "3.7", "3.8", "3.9", "3.11"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": True, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/people-and-planet-ai/weather-forecasting/tests/dataset_tests/requirements-test.txt b/people-and-planet-ai/weather-forecasting/tests/dataset_tests/requirements-test.txt new file mode 100644 index 000000000000..a4a1408f0db4 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/dataset_tests/requirements-test.txt @@ -0,0 +1,4 @@ +ipykernel==6.20.1 +nbclient==0.7.2 +pytest-xdist==3.1.0 +pytest==7.2.0 diff --git a/people-and-planet-ai/weather-forecasting/tests/dataset_tests/requirements.txt b/people-and-planet-ai/weather-forecasting/tests/dataset_tests/requirements.txt new file mode 100644 index 000000000000..f807deb40e7f --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/dataset_tests/requirements.txt @@ -0,0 +1,4 @@ +../../serving/weather-data +apache-beam[gcp,interactive]==2.43.0 +build==0.10.0 +plotly==5.12.0 diff --git a/people-and-planet-ai/weather-forecasting/tests/dataset_tests/test_dataset.py b/people-and-planet-ai/weather-forecasting/tests/dataset_tests/test_dataset.py new file mode 100644 index 000000000000..562592882fe2 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/dataset_tests/test_dataset.py @@ -0,0 +1,70 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +import textwrap + +# The conftest contains a bunch of reusable fixtures used all over the place. +# If we use a fixture not defined here, it must be on the conftest! +# https://docs.pytest.org/en/latest/explanation/fixtures.html +import conftest # python-docs-samples/people-and-planet-ai/conftest.py + +import pytest + +os.chdir(os.path.join("..", "..")) + + +@pytest.fixture(scope="session") +def test_name() -> str: + # Many fixtures expect a fixture called `test_name`, so be sure to define it! + return "ppai/weather-dataset" + + +def test_dataset( + project: str, bucket_name: str, location: str, unique_name: str +) -> None: + dataflow_dataset_flags = " ".join( + [ + '--runner="DataflowRunner"', + f"--job_name={unique_name}", + "--num-dates=1", + "--num-bins=2", + "--max-requests=2", + ] + ) + + conftest.run_notebook_parallel( + os.path.join("notebooks", "2-dataset.ipynb"), + prelude=textwrap.dedent( + f"""\ + # Google Cloud resources. + project = {repr(project)} + bucket = {repr(bucket_name)} + location = {repr(location)} + """ + ), + sections={ + "# 🗄 Create the dataset locally": { + "data_path": f"gs://{bucket_name}/test/weather/data-local", + }, + "# ☁️ Create the dataset in Dataflow": { + "variables": { + "data_path": f"gs://{bucket_name}/test/weather/data-dataflow", + }, + "replace": {'--runner="DataflowRunner"': dataflow_dataset_flags}, + }, + }, + ) diff --git a/people-and-planet-ai/weather-forecasting/tests/overview_tests/noxfile_config.py b/people-and-planet-ai/weather-forecasting/tests/overview_tests/noxfile_config.py new file mode 100644 index 000000000000..ee3db414b360 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/overview_tests/noxfile_config.py @@ -0,0 +1,43 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be imported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + # 💡 Only test with Python 3.10 + "ignored_versions": ["2.7", "3.6", "3.7", "3.8", "3.9", "3.11"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": True, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/people-and-planet-ai/weather-forecasting/tests/overview_tests/requirements-test.txt b/people-and-planet-ai/weather-forecasting/tests/overview_tests/requirements-test.txt new file mode 100644 index 000000000000..a4a1408f0db4 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/overview_tests/requirements-test.txt @@ -0,0 +1,4 @@ +ipykernel==6.20.1 +nbclient==0.7.2 +pytest-xdist==3.1.0 +pytest==7.2.0 diff --git a/people-and-planet-ai/weather-forecasting/tests/overview_tests/requirements.txt b/people-and-planet-ai/weather-forecasting/tests/overview_tests/requirements.txt new file mode 100644 index 000000000000..39a12f46c9da --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/overview_tests/requirements.txt @@ -0,0 +1,2 @@ +../../serving/weather-data +folium==0.14.0 diff --git a/people-and-planet-ai/weather-forecasting/tests/overview_tests/test_overview.py b/people-and-planet-ai/weather-forecasting/tests/overview_tests/test_overview.py new file mode 100644 index 000000000000..541549c9f3e4 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/overview_tests/test_overview.py @@ -0,0 +1,46 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import os +import textwrap + +# The conftest contains a bunch of reusable fixtures used all over the place. +# If we use a fixture not defined here, it must be on the conftest! +# https://docs.pytest.org/en/latest/explanation/fixtures.html +import conftest # python-docs-samples/people-and-planet-ai/conftest.py + +import pytest + +os.chdir(os.path.join("..", "..")) + + +@pytest.fixture(scope="session") +def test_name() -> str: + # Many fixtures expect a fixture called `test_name`, so be sure to define it! + return "ppai/weather-overview" + + +def test_overview(project: str) -> None: + conftest.run_notebook_parallel( + os.path.join("notebooks", "1-overview.ipynb"), + prelude=textwrap.dedent( + f"""\ + # Google Cloud resources. + project = {repr(project)} + """ + ), + sections={"# 🧭 Overview": {}}, + ) diff --git a/people-and-planet-ai/weather-forecasting/tests/predictions_tests/noxfile_config.py b/people-and-planet-ai/weather-forecasting/tests/predictions_tests/noxfile_config.py new file mode 100644 index 000000000000..ee3db414b360 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/predictions_tests/noxfile_config.py @@ -0,0 +1,43 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be imported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + # 💡 Only test with Python 3.10 + "ignored_versions": ["2.7", "3.6", "3.7", "3.8", "3.9", "3.11"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": True, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/people-and-planet-ai/weather-forecasting/tests/predictions_tests/requirements-test.txt b/people-and-planet-ai/weather-forecasting/tests/predictions_tests/requirements-test.txt new file mode 100644 index 000000000000..a4a1408f0db4 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/predictions_tests/requirements-test.txt @@ -0,0 +1,4 @@ +ipykernel==6.20.1 +nbclient==0.7.2 +pytest-xdist==3.1.0 +pytest==7.2.0 diff --git a/people-and-planet-ai/weather-forecasting/tests/predictions_tests/requirements.txt b/people-and-planet-ai/weather-forecasting/tests/predictions_tests/requirements.txt new file mode 100644 index 000000000000..cdcaa89409cf --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/predictions_tests/requirements.txt @@ -0,0 +1,4 @@ +../../serving/weather-data +../../serving/weather-model +plotly==5.12.0 +torch==1.13.1 diff --git a/people-and-planet-ai/weather-forecasting/tests/predictions_tests/test_predictions.py b/people-and-planet-ai/weather-forecasting/tests/predictions_tests/test_predictions.py new file mode 100644 index 000000000000..9e3f63d79497 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/predictions_tests/test_predictions.py @@ -0,0 +1,78 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from collections.abc import Iterator +import os +import textwrap + +# The conftest contains a bunch of reusable fixtures used all over the place. +# If we use a fixture not defined here, it must be on the conftest! +# https://docs.pytest.org/en/latest/explanation/fixtures.html +import conftest # python-docs-samples/people-and-planet-ai/conftest.py + +import pytest + +os.chdir(os.path.join("..", "..")) + + +@pytest.fixture(scope="session") +def test_name() -> str: + # Many fixtures expect a fixture called `test_name`, so be sure to define it! + return "ppai/weather-predictions" + + +@pytest.fixture(scope="session") +def model_path_gcs(bucket_name: str) -> str: + path_gcs = f"gs://{bucket_name}/model" + conftest.run_cmd("gsutil", "cp", "serving/model/*", path_gcs) + return path_gcs + + +@pytest.fixture(scope="session") +def service_name(unique_name: str, location: str) -> Iterator[str]: + # The service itself is created in the notebook. + yield unique_name + conftest.cloud_run_cleanup(unique_name, location) + + +def test_predictions( + project: str, + bucket_name: str, + location: str, + identity_token: str, + service_name: str, + model_path_gcs: str, +) -> None: + conftest.run_notebook_parallel( + os.path.join("notebooks", "4-predictions.ipynb"), + prelude=textwrap.dedent( + f"""\ + # Google Cloud resources. + project = {repr(project)} + bucket = {repr(bucket_name)} + location = {repr(location)} + """ + ), + sections={ + "# 💻 Local predictions": {"variables": {"model_path_gcs": model_path_gcs}}, + "# ☁️ Cloud Run predictions": { + "variables": { + "service_name": service_name, + "identity_token": identity_token, + } + }, + }, + ) diff --git a/people-and-planet-ai/weather-forecasting/tests/training_tests/noxfile_config.py b/people-and-planet-ai/weather-forecasting/tests/training_tests/noxfile_config.py new file mode 100644 index 000000000000..ee3db414b360 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/training_tests/noxfile_config.py @@ -0,0 +1,43 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Default TEST_CONFIG_OVERRIDE for python repos. + +# You can copy this file into your directory, then it will be imported from +# the noxfile.py. + +# The source of truth: +# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/main/noxfile_config.py + +TEST_CONFIG_OVERRIDE = { + # You can opt out from the test for specific Python versions. + # 💡 Only test with Python 3.10 + "ignored_versions": ["2.7", "3.6", "3.7", "3.8", "3.9", "3.11"], + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + "enforce_type_hints": True, + # An envvar key for determining the project id to use. Change it + # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a + # build specific Cloud project. You can also use your own string + # to use your own Cloud project. + "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, + # A dictionary you want to inject into your test. Don't put any + # secrets here. These values will override predefined values. + "envs": {}, +} diff --git a/people-and-planet-ai/weather-forecasting/tests/training_tests/requirements-test.txt b/people-and-planet-ai/weather-forecasting/tests/training_tests/requirements-test.txt new file mode 100644 index 000000000000..84b605ec357d --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/training_tests/requirements-test.txt @@ -0,0 +1,5 @@ +../../serving/weather-data +ipykernel==6.20.1 +nbclient==0.7.2 +pytest-xdist==3.1.0 +pytest==7.2.0 diff --git a/people-and-planet-ai/weather-forecasting/tests/training_tests/requirements.txt b/people-and-planet-ai/weather-forecasting/tests/training_tests/requirements.txt new file mode 100644 index 000000000000..f3c6328d69aa --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/training_tests/requirements.txt @@ -0,0 +1,4 @@ +../../serving/weather-model +build==0.10.0 +google-cloud-aiplatform==1.21.0 +torch==1.13.1 diff --git a/people-and-planet-ai/weather-forecasting/tests/training_tests/test_training.py b/people-and-planet-ai/weather-forecasting/tests/training_tests/test_training.py new file mode 100644 index 000000000000..198e036d7624 --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/tests/training_tests/test_training.py @@ -0,0 +1,88 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from datetime import datetime +import os +import tempfile +import textwrap + +# The conftest contains a bunch of reusable fixtures used all over the place. +# If we use a fixture not defined here, it must be on the conftest! +# https://docs.pytest.org/en/latest/explanation/fixtures.html +import conftest # python-docs-samples/people-and-planet-ai/conftest.py + +import numpy as np +import pytest + +from weather.data import get_inputs_patch, get_labels_patch + +os.chdir(os.path.join("..", "..")) + + +@pytest.fixture(scope="session") +def test_name() -> str: + # Many fixtures expect a fixture called `test_name`, so be sure to define it! + return "ppai/weather-training" + + +@pytest.fixture(scope="session") +def data_path_gcs(bucket_name: str) -> str: + path_gcs = f"gs://{bucket_name}/test/weather/data-training" + date = datetime(2019, 9, 2, 18) + point = (-69.55, -39.82) + patch_size = 8 + inputs = get_inputs_patch(date, point, patch_size) + labels = get_labels_patch(date, point, patch_size) + with tempfile.NamedTemporaryFile() as f: + batch_size = 16 + inputs_batch = [inputs] * batch_size + labels_batch = [labels] * batch_size + np.savez_compressed(f, inputs=inputs_batch, labels=labels_batch) + conftest.run_cmd("gsutil", "cp", f.name, f"{path_gcs}/example.npz") + return path_gcs + + +def test_train_model( + project: str, + bucket_name: str, + location: str, + data_path_gcs: str, + unique_name: str, +) -> None: + conftest.run_notebook_parallel( + os.path.join("notebooks", "3-training.ipynb"), + prelude=textwrap.dedent( + f"""\ + # Google Cloud resources. + project = {repr(project)} + bucket = {repr(bucket_name)} + location = {repr(location)} + """ + ), + sections={ + "# 🧠 Train the model locally": { + "variables": {"data_path_gcs": data_path_gcs, "epochs": 2} + }, + "# ☁️ Train the model in Vertex AI": { + "variables": { + "display_name": unique_name, + "data_path": data_path_gcs.replace("gs://", "/gcs/"), + "model_path": f"/gcs/{bucket_name}/test/weather/model-vertex", + "epochs": 2, + } + }, + }, + ) diff --git a/people-and-planet-ai/weather-forecasting/visualize.py b/people-and-planet-ai/weather-forecasting/visualize.py new file mode 100644 index 000000000000..31177f4b350a --- /dev/null +++ b/people-and-planet-ai/weather-forecasting/visualize.py @@ -0,0 +1,156 @@ +# Copyright 2023 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utility functions to visualize data. + +Color names from https://chir.ag/projects/name-that-color +""" + +from __future__ import annotations + +import numpy as np +import plotly.graph_objects as graph_objects +from plotly.subplots import make_subplots + + +def render_rgb_images( + values: np.ndarray, min: float = 0.0, max: float = 1.0 +) -> np.ndarray: + """Renders a numeric NumPy array with shape (width, height, rgb) as an image. + + Args: + values: A float array with shape (width, height, rgb). + min: Minimum value in the values. + max: Maximum value in the values. + + Returns: An uint8 array with shape (width, height, rgb). + """ + scaled_values = (values - min) / (max - min) + rgb_values = scaled_values.clip(0, 1) * 255 + return rgb_values.astype(np.uint8) + + +def render_palette( + values: np.ndarray, palette: list[str], min: float = 0.0, max: float = 1.0 +) -> np.ndarray: + """Renders a NumPy array with shape (width, height, 1) as an image with a palette. + + Args: + values: An uint8 array with shape (width, height, 1). + palette: List of hex encoded colors. + + Returns: An uint8 array with shape (width, height, rgb) with colors from the palette. + """ + # Create a color map from a hex color palette. + xs = np.linspace(0, len(palette), 256) + indices = np.arange(len(palette)) + + red = np.interp(xs, indices, [int(c[0:2], 16) for c in palette]) + green = np.interp(xs, indices, [int(c[2:4], 16) for c in palette]) + blue = np.interp(xs, indices, [int(c[4:6], 16) for c in palette]) + color_map = np.array([red, green, blue]).astype(np.uint8).transpose() + + scaled_values = (values - min) / (max - min) + color_indices = (scaled_values.clip(0, 1) * 255).astype(np.uint8) + return np.take(color_map, color_indices, axis=0) + + +def render_goes16(patch: np.ndarray) -> np.ndarray: + red = patch[:, :, 1] # CMI_C02 + green = patch[:, :, 2] # CMI_C03 + blue = patch[:, :, 0] # CMI_C01 + rgb_patch = np.stack([red, green, blue], axis=-1) + return render_rgb_images(rgb_patch, max=3000) + + +def render_gpm(patch: np.ndarray) -> np.ndarray: + palette = [ + "000096", # Navy blue + "0064ff", # Blue ribbon blue + "00b4ff", # Dodger blue + "33db80", # Shamrock green + "9beb4a", # Conifer green + "ffeb00", # Turbo yellow + "ffb300", # Selective yellow + "ff6400", # Blaze orange + "eb1e00", # Scarlet red + "af0000", # Bright red + ] + return render_palette(patch[:, :, 0], palette, max=20) + + +def render_elevation(patch: np.ndarray) -> np.ndarray: + palette = [ + "000000", # Black + "478fcd", # Shakespeare blue + "86c58e", # De York green + "afc35e", # Celery green + "8f7131", # Pesto brown + "b78d4f", # Muddy waters brown + "e2b8a6", # Rose fog pink + "ffffff", # White + ] + return render_palette(patch[:, :, 0], palette, max=3000) + + +def show_inputs(patch: np.ndarray) -> None: + fig = make_subplots(rows=2, cols=4) + fig.add_trace(graph_objects.Image(z=render_gpm(patch[:, :, 0:1])), row=1, col=1) + fig.add_trace(graph_objects.Image(z=render_gpm(patch[:, :, 1:2])), row=1, col=2) + fig.add_trace(graph_objects.Image(z=render_gpm(patch[:, :, 2:3])), row=1, col=3) + fig.add_trace(graph_objects.Image(z=render_goes16(patch[:, :, 3:19])), row=2, col=1) + fig.add_trace( + graph_objects.Image(z=render_goes16(patch[:, :, 19:35])), row=2, col=2 + ) + fig.add_trace( + graph_objects.Image(z=render_goes16(patch[:, :, 35:51])), row=2, col=3 + ) + fig.add_trace( + graph_objects.Image(z=render_elevation(patch[:, :, 51:52])), row=1, col=4 + ) + fig.update_layout(height=500, margin=dict(l=0, r=0, b=0, t=0)) + fig.show() + + +def show_outputs(patch: np.ndarray) -> None: + fig = make_subplots(rows=1, cols=2) + fig.add_trace(graph_objects.Image(z=render_gpm(patch[:, :, 0:1])), row=1, col=1) + fig.add_trace(graph_objects.Image(z=render_gpm(patch[:, :, 1:2])), row=1, col=2) + fig.update_layout(height=300, margin=dict(l=0, r=0, b=0, t=0)) + fig.show() + + +def show_predictions(results: list[tuple]) -> None: + fig = make_subplots(rows=5, cols=len(results), vertical_spacing=0.025) + for i, (inputs, predictions, labels) in enumerate(results, start=1): + fig.add_trace( + graph_objects.Image(z=render_goes16(inputs[:, :, 35:51])), row=1, col=i + ) + fig.add_trace( + graph_objects.Image(z=render_gpm(inputs[:, :, 2:3])), row=2, col=i + ) + fig.add_trace( + graph_objects.Image(z=render_elevation(inputs[:, :, 51:52])), row=3, col=i + ) + fig.add_trace( + graph_objects.Image(z=render_gpm(predictions[:, :, 0:1])), row=4, col=i + ) + fig.add_trace( + graph_objects.Image(z=render_gpm(labels[:, :, 0:1])), row=5, col=i + ) + fig.update_layout( + height=5 * int(1000 / len(results)), + margin=dict(l=0, r=0, b=0, t=0), + ) + fig.show()