From f2bacd167950580da96ad09049124cbb1afe0ace Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 7 Jun 2021 11:37:40 -0700
Subject: [PATCH 01/87] dataflow: minimal gpu examples

---
 dataflow/gpu-workers/Dockerfile               | 54 ----------------
 dataflow/gpu-workers/README.md                |  5 --
 .../gpu-workers/pytorch-minimal/.dockerignore |  5 ++
 .../gpu-workers/pytorch-minimal/.gcloudignore |  5 ++
 .../gpu-workers/pytorch-minimal/Dockerfile    | 30 +++++++++
 .../gpu-workers/pytorch-minimal/README.md     | 45 +++++++++++++
 .../gpu-workers/pytorch-minimal/build.yaml    | 32 ++++++++++
 dataflow/gpu-workers/pytorch-minimal/main.py  | 61 ++++++++++++++++++
 .../pytorch-minimal/requirements.txt          |  2 +
 dataflow/gpu-workers/pytorch-minimal/run.yaml | 51 +++++++++++++++
 .../{ => tensorflow-landsat}/.dockerignore    |  0
 .../{ => tensorflow-landsat}/.gcloudignore    |  0
 .../gpu-workers/tensorflow-landsat/Dockerfile | 41 ++++++++++++
 .../gpu-workers/tensorflow-landsat/README.md  | 48 ++++++++++++++
 .../gpu-workers/tensorflow-landsat/build.yaml | 32 ++++++++++
 .../{ => tensorflow-landsat}/cloudbuild.yaml  |  0
 .../{ => tensorflow-landsat}/e2e_test.py      |  0
 .../main.py}                                  | 63 ++++++++++---------
 .../noxfile_config.py                         |  0
 .../requirements-test.txt                     |  0
 .../{ => tensorflow-landsat}/requirements.txt |  0
 .../gpu-workers/tensorflow-landsat/run.yaml   | 53 ++++++++++++++++
 .../tensorflow-minimal/.dockerignore          |  5 ++
 .../tensorflow-minimal/.gcloudignore          |  5 ++
 .../gpu-workers/tensorflow-minimal/Dockerfile | 41 ++++++++++++
 .../gpu-workers/tensorflow-minimal/README.md  | 45 +++++++++++++
 .../gpu-workers/tensorflow-minimal/build.yaml | 32 ++++++++++
 .../gpu-workers/tensorflow-minimal/main.py    | 62 ++++++++++++++++++
 .../tensorflow-minimal/requirements.txt       |  2 +
 .../gpu-workers/tensorflow-minimal/run.yaml   | 50 +++++++++++++++
 30 files changed, 680 insertions(+), 89 deletions(-)
 delete mode 100644 dataflow/gpu-workers/Dockerfile
 delete mode 100644 dataflow/gpu-workers/README.md
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/.dockerignore
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/.gcloudignore
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/Dockerfile
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/README.md
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/build.yaml
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/main.py
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/requirements.txt
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/run.yaml
 rename dataflow/gpu-workers/{ => tensorflow-landsat}/.dockerignore (100%)
 rename dataflow/gpu-workers/{ => tensorflow-landsat}/.gcloudignore (100%)
 create mode 100644 dataflow/gpu-workers/tensorflow-landsat/Dockerfile
 create mode 100644 dataflow/gpu-workers/tensorflow-landsat/README.md
 create mode 100644 dataflow/gpu-workers/tensorflow-landsat/build.yaml
 rename dataflow/gpu-workers/{ => tensorflow-landsat}/cloudbuild.yaml (100%)
 rename dataflow/gpu-workers/{ => tensorflow-landsat}/e2e_test.py (100%)
 rename dataflow/gpu-workers/{landsat_view.py => tensorflow-landsat/main.py} (90%)
 rename dataflow/gpu-workers/{ => tensorflow-landsat}/noxfile_config.py (100%)
 rename dataflow/gpu-workers/{ => tensorflow-landsat}/requirements-test.txt (100%)
 rename dataflow/gpu-workers/{ => tensorflow-landsat}/requirements.txt (100%)
 create mode 100644 dataflow/gpu-workers/tensorflow-landsat/run.yaml
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/.dockerignore
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/.gcloudignore
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/Dockerfile
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/README.md
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/build.yaml
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/main.py
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/requirements.txt
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/run.yaml

diff --git a/dataflow/gpu-workers/Dockerfile b/dataflow/gpu-workers/Dockerfile
deleted file mode 100644
index 7243acc3142..00000000000
--- a/dataflow/gpu-workers/Dockerfile
+++ /dev/null
@@ -1,54 +0,0 @@
-# Copyright 2020 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Make sure the CUDA and cuDNN versions are compatible with your TensorFlow version.
-#   https://www.tensorflow.org/install/source#gpu
-# Check the Nvidia container registry catalog to look at the available Nvidia images:
-#   https://ngc.nvidia.com/catalog/containers/nvidia:cuda
-FROM nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
-
-# The Python version of the Dockerfile MUST match the Python version you use
-# to launch the Dataflow job.
-ARG python_version=3.8
-
-WORKDIR /root
-
-# Copy the Apache Beam worker files and the requirements.txt file.
-COPY --from=apache/beam_python3.8_sdk:2.29.0 /opt/apache/beam /opt/apache/beam
-COPY requirements.txt .
-
-# Update PATH so we find our new Conda and Python installations.
-ENV PATH=/opt/python/bin:/opt/conda/bin:$PATH
-
-RUN apt-get update \
-    && apt-get upgrade -y \
-    && apt-get install -y wget \
-    && rm -rf /var/lib/apt/lists/* \
-    # The nvidia image doesn't come with Python pre-installed.
-    # We use Miniconda to install the Python version of our choice.
-    && wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
-    && sh Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
-    && rm Miniconda3-latest-Linux-x86_64.sh \
-    # Create a new Python environment and install our requirements.
-    # We don't need to update $PATH since /usr/local is already in $PATH.
-    && conda create -y -p /opt/python python=$python_version pip \
-    && pip install --no-cache-dir -U pip \
-    && pip install --no-cache-dir -r requirements.txt \
-    && conda clean -y --all --force-pkgs-dirs \
-    # Beam workers looks for pip at /usr/local/bin/pip by default.
-    # This can be omitted in Beam 2.30.0 and later versions.
-    && ln -s $(which pip) /usr/local/bin/pip
-
-# Set the entrypoint to Apache Beam SDK worker launcher.
-ENTRYPOINT [ "/opt/apache/beam/boot" ]
diff --git a/dataflow/gpu-workers/README.md b/dataflow/gpu-workers/README.md
deleted file mode 100644
index a71f0da3e95..00000000000
--- a/dataflow/gpu-workers/README.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Workers with GPUs
-
-[![Open in Cloud Shell](http://gstatic.com/cloudssh/images/open-btn.svg)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dataflow/gpu-workers/README.md)
-
-📝 Tutorial: [Processing Landsat satellite images with GPUs](https://cloud.google.com/dataflow/docs/samples/satellite-images-gpus)
diff --git a/dataflow/gpu-workers/pytorch-minimal/.dockerignore b/dataflow/gpu-workers/pytorch-minimal/.dockerignore
new file mode 100644
index 00000000000..775d845fa58
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/.dockerignore
@@ -0,0 +1,5 @@
+# Ignore everything except the source files.
+**/*
+!Dockerfile
+!requirements.txt
+!*.py
diff --git a/dataflow/gpu-workers/pytorch-minimal/.gcloudignore b/dataflow/gpu-workers/pytorch-minimal/.gcloudignore
new file mode 100644
index 00000000000..775d845fa58
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/.gcloudignore
@@ -0,0 +1,5 @@
+# Ignore everything except the source files.
+**/*
+!Dockerfile
+!requirements.txt
+!*.py
diff --git a/dataflow/gpu-workers/pytorch-minimal/Dockerfile b/dataflow/gpu-workers/pytorch-minimal/Dockerfile
new file mode 100644
index 00000000000..4bee40c9d13
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/Dockerfile
@@ -0,0 +1,30 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-runtime
+
+WORKDIR /pipeline
+
+# Copy the Apache Beam worker files and the pipeline source files.
+COPY --from=apache/beam_python3.8_sdk:2.29.0 /opt/apache/beam /opt/apache/beam
+COPY requirements.txt .
+COPY *.py ./
+
+# Install the pipeline requirements and check that there are no conflicts.
+RUN pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir -r requirements.txt \
+    && pip check
+
+# Set the entrypoint to Apache Beam SDK worker launcher.
+ENTRYPOINT [ "/opt/apache/beam/boot" ]
diff --git a/dataflow/gpu-workers/pytorch-minimal/README.md b/dataflow/gpu-workers/pytorch-minimal/README.md
new file mode 100644
index 00000000000..15a81d95fc7
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/README.md
@@ -0,0 +1,45 @@
+# PyTorch GPU minimal pipeline
+
+## Before you begin
+
+Make sure you have followed the
+[Dataflow setup instructions](../../README.md).
+
+Finally, save your resource names in environment variables.
+
+```sh
+export PROJECT=$(gcloud config get-value project)
+```
+
+## Building the Docker image
+
+We use Cloud Build to build the container image for the workers.
+
+```sh
+gcloud builds submit --config build.yaml
+```
+
+## Running the Dataflow job with GPUs
+
+We use Cloud Build to run the Dataflow job.
+We launch the job using the worker image to make sure the job launches
+with the same Python version as the workers.
+
+```sh
+export REGION="us-central1"
+export WORKER_ZONE="us-central1-f"
+export GPU_TYPE="nvidia-tesla-t4"
+
+gcloud beta builds submit \
+    --config run.yaml \
+    --substitutions _REGION=$REGION,_WORKER_ZONE=$WORKER_ZONE,_GPU_TYPE=$GPU_TYPE \
+    --no-source
+```
+
+> ℹ️ Make sure the GPU type you choose is available in the worker zone for the job.
+> For more information, see [GPU availability](https://cloud.google.com/dataflow/docs/resources/locations#gpu_availability).
+
+## What's next?
+
+For a more complete example, take a look at
+📝 [Processing Landsat satellite images with GPUs](https://cloud.google.com/dataflow/docs/samples/satellite-images-gpus).
diff --git a/dataflow/gpu-workers/pytorch-minimal/build.yaml b/dataflow/gpu-workers/pytorch-minimal/build.yaml
new file mode 100644
index 00000000000..c72876e2623
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/build.yaml
@@ -0,0 +1,32 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -----------------------------------------------------------------------------
+# This Cloud Build config file builds and pushes the image for the workers.
+#
+# To learn more about this file:
+#   https://cloud.google.com/build/docs/build-config
+# -----------------------------------------------------------------------------
+
+steps:
+- name: gcr.io/cloud-builders/docker
+  args:
+  - build
+  - --tag=gcr.io/$PROJECT_ID/samples/dataflow/pytorch-gpu:latest
+  - .
+
+images: [gcr.io/$PROJECT_ID/samples/dataflow/pytorch-gpu:latest]
+
+options:
+  machineType: E2_HIGHCPU_8
diff --git a/dataflow/gpu-workers/pytorch-minimal/main.py b/dataflow/gpu-workers/pytorch-minimal/main.py
new file mode 100644
index 00000000000..19b5a740fba
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/main.py
@@ -0,0 +1,61 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import logging
+from typing import Any, List, Optional
+
+import apache_beam as beam
+from apache_beam.options.pipeline_options import PipelineOptions
+import torch
+
+
+def check_gpus(element: Any, gpus_optional: bool = False) -> Any:
+    """Validates that we are detecting GPUs, otherwise raise a RuntimeError."""
+    if torch.cuda.is_available():
+        logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
+    elif gpus_optional:
+        logging.warning("No GPUs found, defaulting to CPU.")
+    else:
+        raise RuntimeError("No GPUs found.")
+    return element
+
+
+def run(input_text: str, beam_args: Optional[List[str]] = None) -> None:
+    beam_options = PipelineOptions(beam_args, save_main_session=True)
+
+    # We currently cannot use the `with` statement to run without waiting.
+    #   https://issues.apache.org/jira/browse/BEAM-12455
+    pipeline = beam.Pipeline(options=beam_options)
+    (
+        pipeline
+        | "Create data" >> beam.Create([input_text])
+        | "Check GPU availability" >> beam.Map(check_gpus)
+        | "My transform" >> beam.Map(logging.info)
+    )
+    pipeline.run()
+
+
+if __name__ == "__main__":
+    logging.getLogger().setLevel(logging.INFO)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input-text",
+        default="Hello!",
+        help="Input text to display.",
+    )
+    args, beam_args = parser.parse_known_args()
+
+    run(args.input_text, beam_args)
diff --git a/dataflow/gpu-workers/pytorch-minimal/requirements.txt b/dataflow/gpu-workers/pytorch-minimal/requirements.txt
new file mode 100644
index 00000000000..530aa4098e7
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/requirements.txt
@@ -0,0 +1,2 @@
+apache-beam[gcp]==2.29.0
+torch==1.8.1
diff --git a/dataflow/gpu-workers/pytorch-minimal/run.yaml b/dataflow/gpu-workers/pytorch-minimal/run.yaml
new file mode 100644
index 00000000000..83858d79f36
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/run.yaml
@@ -0,0 +1,51 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This Cloud Build config runs a Dataflow job using GPUs.
+# We use the same worker image to launch the job.
+# That way we guarantee the same Python version for the workers.
+# It also already has all the requirements installed.
+
+# -----------------------------------------------------------------------------
+# To learn more about this file:
+#   https://cloud.google.com/build/docs/build-config
+#
+# To learn more about Cloud Build variable substitutions:
+#   https://cloud.google.com/build/docs/configuring-builds/substitute-variable-values#using_user-defined_substitutions
+# -----------------------------------------------------------------------------
+
+substitutions:
+  _REGION: us-central1
+  _WORKER_ZONE: us-central1-f
+  _GPU_TYPE: nvidia-tesla-t4
+  _GPU_COUNT: '1'
+
+steps:
+- name: gcr.io/$PROJECT_ID/samples/dataflow/pytorch-gpu:latest
+  entrypoint: python
+  args:
+  - /pipeline/main.py
+  - --runner=DataflowRunner
+  - --project=$PROJECT_ID
+  - --region=$_REGION
+  - --worker_harness_container_image=gcr.io/$PROJECT_ID/samples/dataflow/pytorch-gpu:latest
+  - --worker_zone=$_WORKER_ZONE
+  - --disk_size_gb=100
+  - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
+  - --experiment=use_runner_v2
+
+options:
+  logging: CLOUD_LOGGING_ONLY
+
+serviceAccount: projects/$PROJECT_ID/serviceAccounts/$PROJECT_NUMBER-compute@developer.gserviceaccount.com
diff --git a/dataflow/gpu-workers/.dockerignore b/dataflow/gpu-workers/tensorflow-landsat/.dockerignore
similarity index 100%
rename from dataflow/gpu-workers/.dockerignore
rename to dataflow/gpu-workers/tensorflow-landsat/.dockerignore
diff --git a/dataflow/gpu-workers/.gcloudignore b/dataflow/gpu-workers/tensorflow-landsat/.gcloudignore
similarity index 100%
rename from dataflow/gpu-workers/.gcloudignore
rename to dataflow/gpu-workers/tensorflow-landsat/.gcloudignore
diff --git a/dataflow/gpu-workers/tensorflow-landsat/Dockerfile b/dataflow/gpu-workers/tensorflow-landsat/Dockerfile
new file mode 100644
index 00000000000..a8686076460
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-landsat/Dockerfile
@@ -0,0 +1,41 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Each version of TensorFlow requires a specific CUDA/cuDNN version:
+#   https://www.tensorflow.org/install/source#gpu
+# For a list of all the nvidia images:
+#   https://ngc.nvidia.com/catalog/containers/nvidia:cuda/tags
+FROM nvcr.io/nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
+
+WORKDIR /pipeline
+
+# Copy the Apache Beam worker files and the pipeline source files.
+COPY --from=apache/beam_python3.8_sdk:2.29.0 /opt/apache/beam /opt/apache/beam
+COPY requirements.txt .
+COPY *.py ./
+
+# If you need a different Python version, consider:
+#   https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
+RUN apt-get update \
+    && apt-get install -y curl python3.8 python3-distutils \
+    && rm -rf /var/lib/apt/lists/* \
+    && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
+    && curl https://bootstrap.pypa.io/get-pip.py | python \
+    # Install the pipeline requirements and check that there are no conflicts.
+    && pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir -r requirements.txt \
+    && pip check
+
+# Set the entrypoint to Apache Beam SDK worker launcher.
+ENTRYPOINT [ "/opt/apache/beam/boot" ]
diff --git a/dataflow/gpu-workers/tensorflow-landsat/README.md b/dataflow/gpu-workers/tensorflow-landsat/README.md
new file mode 100644
index 00000000000..4d87ed8f622
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-landsat/README.md
@@ -0,0 +1,48 @@
+# Workers with GPUs
+
+[![Open in Cloud Shell](http://gstatic.com/cloudssh/images/open-btn.svg)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dataflow/gpu-workers/README.md)
+
+📝 Tutorial: [Processing Landsat satellite images with GPUs](https://cloud.google.com/dataflow/docs/samples/satellite-images-gpus)
+
+## Before you begin
+
+Make sure you have followed the
+[Dataflow setup instructions](../../README.md), and additionally:
+
+* Use or [create a Cloud Storage bucket](https://console.cloud.google.com/storage/create-bucket).
+
+Finally, save your resource names in environment variables.
+
+```sh
+export PROJECT=$(gcloud config get-value project)
+export BUCKET="my-bucket-name"
+```
+
+## Building the Docker image
+
+We use Cloud Build to build the container image for the workers.
+
+```sh
+gcloud builds submit --config build.yaml
+```
+
+## Running the Dataflow job with GPUs
+
+We use Cloud Build to run the Dataflow job.
+We launch the job using the worker image to make sure the job launches
+with the same Python version as the workers.
+
+```sh
+export OUTPUT_PATH="gs://$BUCKET/samples/dataflow/landsat/"
+export REGION="us-central1"
+export WORKER_ZONE="us-central1-f"
+export GPU_TYPE="nvidia-tesla-t4"
+
+gcloud beta builds submit \
+    --config run.yaml \
+    --substitutions _OUTPUT_PATH=$OUTPUT_PATH,_REGION=$REGION,_WORKER_ZONE=$WORKER_ZONE,_GPU_TYPE=$GPU_TYPE \
+    --no-source
+```
+
+> ℹ️ Make sure the GPU type you choose is available in the worker zone for the job.
+> For more information, see [GPU availability](https://cloud.google.com/dataflow/docs/resources/locations#gpu_availability).
diff --git a/dataflow/gpu-workers/tensorflow-landsat/build.yaml b/dataflow/gpu-workers/tensorflow-landsat/build.yaml
new file mode 100644
index 00000000000..b2b81b8f92d
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-landsat/build.yaml
@@ -0,0 +1,32 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -----------------------------------------------------------------------------
+# This Cloud Build config file builds and pushes the image for the workers.
+#
+# To learn more about this file:
+#   https://cloud.google.com/build/docs/build-config
+# -----------------------------------------------------------------------------
+
+steps:
+- name: gcr.io/cloud-builders/docker
+  args:
+  - build
+  - --tag=gcr.io/$PROJECT_ID/samples/dataflow/landsat-gpu:latest
+  - .
+
+images: [gcr.io/$PROJECT_ID/samples/dataflow/landsat-gpu:latest]
+
+options:
+  machineType: E2_HIGHCPU_8
diff --git a/dataflow/gpu-workers/cloudbuild.yaml b/dataflow/gpu-workers/tensorflow-landsat/cloudbuild.yaml
similarity index 100%
rename from dataflow/gpu-workers/cloudbuild.yaml
rename to dataflow/gpu-workers/tensorflow-landsat/cloudbuild.yaml
diff --git a/dataflow/gpu-workers/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
similarity index 100%
rename from dataflow/gpu-workers/e2e_test.py
rename to dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
diff --git a/dataflow/gpu-workers/landsat_view.py b/dataflow/gpu-workers/tensorflow-landsat/main.py
similarity index 90%
rename from dataflow/gpu-workers/landsat_view.py
rename to dataflow/gpu-workers/tensorflow-landsat/main.py
index 9e61016eabf..00216422633 100644
--- a/dataflow/gpu-workers/landsat_view.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/main.py
@@ -276,39 +276,42 @@ def run(
     max_value = vis_params["max"]
     gamma = vis_params["gamma"]
 
-    options = PipelineOptions(beam_args, save_main_session=True)
-    with beam.Pipeline(options=options) as pipeline:
-        # Optionally, validate that the workers are using GPUs.
-        gpu_check = (
-            pipeline
-            | beam.Create([None])
-            | "Check GPU availability" >> beam.Map(check_gpus, gpus_optional)
+    beam_options = PipelineOptions(beam_args, save_main_session=True)
+
+    # We currently cannot use the `with` statement to run without waiting.
+    #   https://issues.apache.org/jira/browse/BEAM-12455
+    pipeline = beam.Pipeline(options=beam_options)
+
+    # Convert Landsat 8 scenes into images.
+    # ℹ️ We pass `gpu_check` as an unused side input to force that step in
+    # the pipeline to wait for the check before continuing.
+    (
+        pipeline
+        | "Create scene IDs" >> beam.Create(scenes)
+        | "Get RGB band paths"
+        >> beam.Map(
+            get_band_paths,
+            rgb_band_names,
+            unused_side_input=beam.pvalue.AsSingleton(
+                pipeline
+                | beam.Create([None])
+                | "Check GPUs" >> beam.Map(check_gpus, gpus_optional)
+            ),
         )
-
-        # Convert Landsat 8 scenes into images.
-        # ℹ️ We pass `gpu_check` as an unused side input to force that step in
-        # the pipeline to wait for the check before continuing.
-        (
-            pipeline
-            | "Create scene IDs" >> beam.Create(scenes)
-            | "Get RGB band paths"
-            >> beam.Map(
-                get_band_paths,
-                rgb_band_names,
-                unused_side_input=beam.pvalue.AsSingleton(gpu_check),
-            )
-            | "Load RGB band values" >> beam.MapTuple(load_values)
-            | "Preprocess pixels"
-            >> beam.MapTuple(preprocess_pixels, min_value, max_value, gamma)
-            | "Convert to image"
-            >> beam.MapTuple(
-                lambda scene, rgb_pixels: (
-                    scene,
-                    Image.fromarray(rgb_pixels.numpy(), mode="RGB"),
-                )
+        | "Load RGB band values" >> beam.MapTuple(load_values)
+        | "Preprocess pixels"
+        >> beam.MapTuple(preprocess_pixels, min_value, max_value, gamma)
+        | "Convert to image"
+        >> beam.MapTuple(
+            lambda scene, rgb_pixels: (
+                scene,
+                Image.fromarray(rgb_pixels.numpy(), mode="RGB"),
             )
-            | "Save to Cloud Storage" >> beam.MapTuple(save_to_gcs, output_path_prefix)
         )
+        | "Save to Cloud Storage" >> beam.MapTuple(save_to_gcs, output_path_prefix)
+    )
+
+    pipeline.run()
 
 
 if __name__ == "__main__":
diff --git a/dataflow/gpu-workers/noxfile_config.py b/dataflow/gpu-workers/tensorflow-landsat/noxfile_config.py
similarity index 100%
rename from dataflow/gpu-workers/noxfile_config.py
rename to dataflow/gpu-workers/tensorflow-landsat/noxfile_config.py
diff --git a/dataflow/gpu-workers/requirements-test.txt b/dataflow/gpu-workers/tensorflow-landsat/requirements-test.txt
similarity index 100%
rename from dataflow/gpu-workers/requirements-test.txt
rename to dataflow/gpu-workers/tensorflow-landsat/requirements-test.txt
diff --git a/dataflow/gpu-workers/requirements.txt b/dataflow/gpu-workers/tensorflow-landsat/requirements.txt
similarity index 100%
rename from dataflow/gpu-workers/requirements.txt
rename to dataflow/gpu-workers/tensorflow-landsat/requirements.txt
diff --git a/dataflow/gpu-workers/tensorflow-landsat/run.yaml b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
new file mode 100644
index 00000000000..f447bfcbbc6
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
@@ -0,0 +1,53 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This Cloud Build config runs a Dataflow job using GPUs.
+# We use the same worker image to launch the job.
+# That way we guarantee the same Python version for the workers.
+# It also already has all the requirements installed.
+
+# -----------------------------------------------------------------------------
+# To learn more about this file:
+#   https://cloud.google.com/build/docs/build-config
+#
+# To learn more about Cloud Build variable substitutions:
+#   https://cloud.google.com/build/docs/configuring-builds/substitute-variable-values#using_user-defined_substitutions
+# -----------------------------------------------------------------------------
+
+substitutions:
+  _OUTPUT_PATH: please set --substitutions _OUTPUT_PATH=gs://my-bucket/output/path
+  _REGION: us-central1
+  _WORKER_ZONE: us-central1-f
+  _GPU_TYPE: nvidia-tesla-t4
+  _GPU_COUNT: '1'
+
+steps:
+- name: gcr.io/$PROJECT_ID/samples/dataflow/landsat-gpu:latest
+  entrypoint: python
+  args:
+  - /pipeline/main.py
+  - --output-path-prefix=$_OUTPUT_PATH
+  - --runner=DataflowRunner
+  - --project=$PROJECT_ID
+  - --region=$_REGION
+  - --worker_machine_type=custom-1-13312-ext
+  - --worker_harness_container_image=gcr.io/$PROJECT_ID/samples/dataflow/landsat-gpu:latest
+  - --worker_zone=$_WORKER_ZONE
+  - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
+  - --experiment=use_runner_v2
+
+options:
+  logging: CLOUD_LOGGING_ONLY
+
+serviceAccount: projects/$PROJECT_ID/serviceAccounts/$PROJECT_NUMBER-compute@developer.gserviceaccount.com
diff --git a/dataflow/gpu-workers/tensorflow-minimal/.dockerignore b/dataflow/gpu-workers/tensorflow-minimal/.dockerignore
new file mode 100644
index 00000000000..775d845fa58
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/.dockerignore
@@ -0,0 +1,5 @@
+# Ignore everything except the source files.
+**/*
+!Dockerfile
+!requirements.txt
+!*.py
diff --git a/dataflow/gpu-workers/tensorflow-minimal/.gcloudignore b/dataflow/gpu-workers/tensorflow-minimal/.gcloudignore
new file mode 100644
index 00000000000..775d845fa58
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/.gcloudignore
@@ -0,0 +1,5 @@
+# Ignore everything except the source files.
+**/*
+!Dockerfile
+!requirements.txt
+!*.py
diff --git a/dataflow/gpu-workers/tensorflow-minimal/Dockerfile b/dataflow/gpu-workers/tensorflow-minimal/Dockerfile
new file mode 100644
index 00000000000..e892d4c28a8
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/Dockerfile
@@ -0,0 +1,41 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Each version of TensorFlow requires a specific CUDA/cuDNN version:
+#   https://www.tensorflow.org/install/source#gpu
+# For a list of all the nvidia images:
+#   https://ngc.nvidia.com/catalog/containers/nvidia:cuda/tags
+FROM nvcr.io/nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
+
+WORKDIR /pipeline
+
+# Copy the Apache Beam worker files and the pipeline source files.
+COPY --from=apache/beam_python3.8_sdk:2.29.0 /opt/apache/beam /opt/apache/beam
+COPY requirements.txt .
+COPY *.py ./
+
+# If you need a different Python version, consider:
+#   https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
+RUN apt-get update \
+    && apt-get install -y curl python3.8 python3-distutils \
+    && rm -rf /var/lib/apt/lists/* \
+    && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
+    && curl https://bootstrap.pypa.io/get-pip.py | python \
+    # Install the pipeline requirements and check that there are no conflicts.
+    && pip install --no-cache-dir --upgrade pip \
+    && pip install --no-cache-dir -r requirements.txt \
+    && pip check
+
+# Set the entrypoint to Apache Beam SDK worker launcher.
+ENTRYPOINT [ "/opt/apache/beam/boot" ]
diff --git a/dataflow/gpu-workers/tensorflow-minimal/README.md b/dataflow/gpu-workers/tensorflow-minimal/README.md
new file mode 100644
index 00000000000..15a81d95fc7
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/README.md
@@ -0,0 +1,45 @@
+# PyTorch GPU minimal pipeline
+
+## Before you begin
+
+Make sure you have followed the
+[Dataflow setup instructions](../../README.md).
+
+Finally, save your resource names in environment variables.
+
+```sh
+export PROJECT=$(gcloud config get-value project)
+```
+
+## Building the Docker image
+
+We use Cloud Build to build the container image for the workers.
+
+```sh
+gcloud builds submit --config build.yaml
+```
+
+## Running the Dataflow job with GPUs
+
+We use Cloud Build to run the Dataflow job.
+We launch the job using the worker image to make sure the job launches
+with the same Python version as the workers.
+
+```sh
+export REGION="us-central1"
+export WORKER_ZONE="us-central1-f"
+export GPU_TYPE="nvidia-tesla-t4"
+
+gcloud beta builds submit \
+    --config run.yaml \
+    --substitutions _REGION=$REGION,_WORKER_ZONE=$WORKER_ZONE,_GPU_TYPE=$GPU_TYPE \
+    --no-source
+```
+
+> ℹ️ Make sure the GPU type you choose is available in the worker zone for the job.
+> For more information, see [GPU availability](https://cloud.google.com/dataflow/docs/resources/locations#gpu_availability).
+
+## What's next?
+
+For a more complete example, take a look at
+📝 [Processing Landsat satellite images with GPUs](https://cloud.google.com/dataflow/docs/samples/satellite-images-gpus).
diff --git a/dataflow/gpu-workers/tensorflow-minimal/build.yaml b/dataflow/gpu-workers/tensorflow-minimal/build.yaml
new file mode 100644
index 00000000000..84f60f90255
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/build.yaml
@@ -0,0 +1,32 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# -----------------------------------------------------------------------------
+# This Cloud Build config file builds and pushes the image for the workers.
+#
+# To learn more about this file:
+#   https://cloud.google.com/build/docs/build-config
+# -----------------------------------------------------------------------------
+
+steps:
+- name: gcr.io/cloud-builders/docker
+  args:
+  - build
+  - --tag=gcr.io/$PROJECT_ID/samples/dataflow/tensorflow-gpu:latest
+  - .
+
+images: [gcr.io/$PROJECT_ID/samples/dataflow/tensorflow-gpu:latest]
+
+options:
+  machineType: E2_HIGHCPU_8
diff --git a/dataflow/gpu-workers/tensorflow-minimal/main.py b/dataflow/gpu-workers/tensorflow-minimal/main.py
new file mode 100644
index 00000000000..924f4e0935d
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/main.py
@@ -0,0 +1,62 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import logging
+from typing import Any, List, Optional
+
+import apache_beam as beam
+from apache_beam.options.pipeline_options import PipelineOptions
+import tensorflow as tf
+
+
+def check_gpus(element: Any, gpus_optional: bool = False) -> Any:
+    """Validates that we are detecting GPUs, otherwise raise a RuntimeError."""
+    gpu_devices = tf.config.list_physical_devices("GPU")
+    if gpu_devices:
+        logging.info(f"Using GPU: {gpu_devices}")
+    elif gpus_optional:
+        logging.warning("No GPUs found, defaulting to CPU.")
+    else:
+        raise RuntimeError("No GPUs found.")
+    return element
+
+
+def run(input_text: str, beam_args: Optional[List[str]] = None) -> None:
+    beam_options = PipelineOptions(beam_args, save_main_session=True)
+
+    # We currently cannot use the `with` statement to run without waiting.
+    #   https://issues.apache.org/jira/browse/BEAM-12455
+    pipeline = beam.Pipeline(options=beam_options)
+    (
+        pipeline
+        | "Create data" >> beam.Create([input_text])
+        | "Check GPU availability" >> beam.Map(check_gpus)
+        | "My transform" >> beam.Map(logging.info)
+    )
+    pipeline.run()
+
+
+if __name__ == "__main__":
+    logging.getLogger().setLevel(logging.INFO)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--input-text",
+        default="Hello!",
+        help="Input text to display.",
+    )
+    args, beam_args = parser.parse_known_args()
+
+    run(args.input_text, beam_args)
diff --git a/dataflow/gpu-workers/tensorflow-minimal/requirements.txt b/dataflow/gpu-workers/tensorflow-minimal/requirements.txt
new file mode 100644
index 00000000000..f2f6e11354a
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/requirements.txt
@@ -0,0 +1,2 @@
+apache-beam[gcp]==2.29.0
+tensorflow==2.5.0
diff --git a/dataflow/gpu-workers/tensorflow-minimal/run.yaml b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
new file mode 100644
index 00000000000..036db374e0d
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
@@ -0,0 +1,50 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This Cloud Build config runs a Dataflow job using GPUs.
+# We use the same worker image to launch the job.
+# That way we guarantee the same Python version for the workers.
+# It also already has all the requirements installed.
+
+# -----------------------------------------------------------------------------
+# To learn more about this file:
+#   https://cloud.google.com/build/docs/build-config
+#
+# To learn more about Cloud Build variable substitutions:
+#   https://cloud.google.com/build/docs/configuring-builds/substitute-variable-values#using_user-defined_substitutions
+# -----------------------------------------------------------------------------
+
+substitutions:
+  _REGION: us-central1
+  _WORKER_ZONE: us-central1-f
+  _GPU_TYPE: nvidia-tesla-t4
+  _GPU_COUNT: '1'
+
+steps:
+- name: gcr.io/$PROJECT_ID/samples/dataflow/tensorflow-gpu:latest
+  entrypoint: python
+  args:
+  - /pipeline/main.py
+  - --runner=DataflowRunner
+  - --project=$PROJECT_ID
+  - --region=$_REGION
+  - --worker_harness_container_image=gcr.io/$PROJECT_ID/samples/dataflow/tensorflow-gpu:latest
+  - --worker_zone=$_WORKER_ZONE
+  - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
+  - --experiment=use_runner_v2
+
+options:
+  logging: CLOUD_LOGGING_ONLY
+
+serviceAccount: projects/$PROJECT_ID/serviceAccounts/$PROJECT_NUMBER-compute@developer.gserviceaccount.com

From d8bba19ed35afa73678e9693165111137843c144 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 7 Jun 2021 14:56:46 -0700
Subject: [PATCH 02/87] added tests

---
 dataflow/conftest.py                          | 80 +++++++++------
 .../flex-templates/streaming_beam/e2e_test.py | 10 +-
 .../gpu-workers/pytorch-minimal/e2e_test.py   | 44 +++++++++
 dataflow/gpu-workers/pytorch-minimal/run.yaml |  4 +-
 .../gpu-workers/tensorflow-landsat/README.md  |  4 +-
 .../tensorflow-landsat/e2e_test.py            | 99 +++----------------
 .../gpu-workers/tensorflow-landsat/run.yaml   |  5 +-
 .../gpu-workers/tensorflow-minimal/run.yaml   |  2 +
 8 files changed, 125 insertions(+), 123 deletions(-)
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/e2e_test.py

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 13314bf86dd..f8191effd89 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -158,40 +158,64 @@ def _infinite_publish_job() -> None:
         p.terminate()
 
     @staticmethod
-    def container_image(
-        image_path: str,
+    def cloud_build_submit(
+        image_name: Optional[str] = None,
+        config: Optional[str] = None,
+        substitutions: Optional[Dict[str, str]] = None,
         project: str = PROJECT,
-        tag: str = "latest",
-    ) -> str:
-        image_name = f"gcr.io/{project}/{image_path}-{UUID}:{tag}"
+    ) -> None:
+        """Sends a Cloud Build job, if an image_name is provided it will be deleted at teardown."""
         cmd = ["gcloud", "auth", "configure-docker"]
         print(cmd)
-        subprocess.run(cmd, check=True)
-        cmd = [
-            "gcloud",
-            "builds",
-            "submit",
-            f"--project={project}",
-            f"--tag={image_name}",
-            ".",
-        ]
-        print(cmd)
-        subprocess.run(cmd, check=True)
 
-        print(f"container_image: {image_name}")
-        yield image_name
+        if substitutions:
+            cmd_substitutions = [
+                f"--substitutions={','.join([k + '=' + v for k, v in substitutions.items()])}"
+            ]
+        else:
+            cmd_substitutions = []
 
-        cmd = [
-            "gcloud",
-            "container",
-            "images",
-            "delete",
-            image_name,
-            f"--project={project}",
-            "--quiet",
-        ]
-        print(cmd)
         subprocess.run(cmd, check=True)
+        if config:
+            cmd = [
+                "gcloud",
+                "builds",
+                "submit",
+                f"--project={project}",
+                f"--config={config}",
+                *cmd_substitutions,
+            ]
+            print(cmd)
+            subprocess.run(cmd, check=True)
+            yield config
+        elif image_name:
+            cmd = [
+                "gcloud",
+                "builds",
+                "submit",
+                f"--project={project}",
+                f"--tag=gcr.io/{project}/{image_name}:latest",
+                *cmd_substitutions,
+                ".",
+            ]
+            print(cmd)
+            subprocess.run(cmd, check=True)
+            yield f"gcr.io/{project}/{image_name}:latest"
+        else:
+            raise ValueError("must specify either `config` or `image_name`")
+
+        if image_name:
+            cmd = [
+                "gcloud",
+                "container",
+                "images",
+                "delete",
+                f"gcr.io/{project}/{image_name}:latest",
+                f"--project={project}",
+                "--quiet",
+            ]
+            print(cmd)
+            subprocess.run(cmd, check=True)
 
     @staticmethod
     def dataflow_job_id_from_job_name(
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index e642306ed4b..08f0676b446 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -13,14 +13,12 @@
 import json
 import time
 
-# `conftest` cannot be imported when running in `nox`, but we still
-# try to import it for the autocomplete when writing the tests.
 try:
+    # `conftest` cannot be imported when running in `nox`, but we still
+    # try to import it for the autocomplete when writing the tests.
     from conftest import Utils
 except ModuleNotFoundError:
-    from typing import Any
-
-    Utils = Any
+    Utils = None
 import pytest
 
 NAME = "dataflow-flex-templates-streaming-beam"
@@ -61,7 +59,7 @@ def pubsub_publisher(utils: Utils, pubsub_topic: str) -> bool:
 
 @pytest.fixture(scope="session")
 def flex_template_image(utils: Utils) -> str:
-    yield from utils.container_image(NAME)
+    yield from utils.cloud_build_submit(NAME)
 
 
 @pytest.fixture(scope="session")
diff --git a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
new file mode 100644
index 00000000000..d0a25e927d9
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+    # `conftest` cannot be imported when running in `nox`, but we still
+    # try to import it for the autocomplete when writing the tests.
+    from conftest import Utils
+except ModuleNotFoundError:
+    Utils = None
+from google.cloud import storage
+import pytest
+
+NAME = "dataflow-gpu-pytorch"
+
+
+@pytest.fixture(scope="session")
+def bucket_name(utils: Utils) -> str:
+    yield from utils.storage_bucket(NAME)
+
+
+@pytest.fixture(scope="session")
+def worker_image(utils: Utils) -> str:
+    yield from utils.cloud_build_submit(NAME, config="build.yaml")
+
+
+def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
+    # Run the Beam pipeline in Dataflow making sure GPUs are used.
+    utils.cloud_build_submit(
+        config="run.yaml",
+        substitutions={"_TEMP_LOCATION": f"gs://{bucket_name}/temp"},
+    )
diff --git a/dataflow/gpu-workers/pytorch-minimal/run.yaml b/dataflow/gpu-workers/pytorch-minimal/run.yaml
index 83858d79f36..c5e74f4d77a 100644
--- a/dataflow/gpu-workers/pytorch-minimal/run.yaml
+++ b/dataflow/gpu-workers/pytorch-minimal/run.yaml
@@ -26,6 +26,7 @@
 # -----------------------------------------------------------------------------
 
 substitutions:
+  _TEMP_LOCATION: ''
   _REGION: us-central1
   _WORKER_ZONE: us-central1-f
   _GPU_TYPE: nvidia-tesla-t4
@@ -39,9 +40,10 @@ steps:
   - --runner=DataflowRunner
   - --project=$PROJECT_ID
   - --region=$_REGION
+  - --temp_location=$_TEMP_LOCATION
   - --worker_harness_container_image=gcr.io/$PROJECT_ID/samples/dataflow/pytorch-gpu:latest
   - --worker_zone=$_WORKER_ZONE
-  - --disk_size_gb=100
+  - --disk_size_gb=50
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
 
diff --git a/dataflow/gpu-workers/tensorflow-landsat/README.md b/dataflow/gpu-workers/tensorflow-landsat/README.md
index 4d87ed8f622..eeb91e7a5e6 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/README.md
+++ b/dataflow/gpu-workers/tensorflow-landsat/README.md
@@ -33,14 +33,14 @@ We launch the job using the worker image to make sure the job launches
 with the same Python version as the workers.
 
 ```sh
-export OUTPUT_PATH="gs://$BUCKET/samples/dataflow/landsat/"
+export GCS_PATH="gs://$BUCKET/samples/dataflow/landsat"
 export REGION="us-central1"
 export WORKER_ZONE="us-central1-f"
 export GPU_TYPE="nvidia-tesla-t4"
 
 gcloud beta builds submit \
     --config run.yaml \
-    --substitutions _OUTPUT_PATH=$OUTPUT_PATH,_REGION=$REGION,_WORKER_ZONE=$WORKER_ZONE,_GPU_TYPE=$GPU_TYPE \
+    --substitutions _GCS_PATH=$GCS_PATH,_REGION=$REGION,_WORKER_ZONE=$WORKER_ZONE,_GPU_TYPE=$GPU_TYPE \
     --no-source
 ```
 
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index f3f105d7b6a..ae49e73f2fb 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -14,102 +14,33 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-import platform
-import subprocess
-import uuid
-
+try:
+    # `conftest` cannot be imported when running in `nox`, but we still
+    # try to import it for the autocomplete when writing the tests.
+    from conftest import Utils
+except ModuleNotFoundError:
+    Utils = None
 from google.cloud import storage
 import pytest
 
-SUFFIX = uuid.uuid4().hex[0:6]
-PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"]
-BUCKET_NAME = f"dataflow-gpu-test-{SUFFIX}"
-IMAGE_NAME = f"dataflow/gpu-workers/test-{SUFFIX}:latest"
-REGION = "us-central1"
-ZONE = "us-central1-f"
-
-
-@pytest.fixture(scope="session")
-def bucket_name() -> str:
-    storage_client = storage.Client()
-    bucket = storage_client.create_bucket(BUCKET_NAME)
-
-    yield BUCKET_NAME
-
-    bucket.delete(force=True)
+NAME = "dataflow-gpu-landsat"
 
 
 @pytest.fixture(scope="session")
-def configure_docker() -> None:
-    subprocess.run(
-        [
-            "gcloud",
-            "auth",
-            "configure-docker",
-        ]
-    )
+def bucket_name(utils: Utils) -> str:
+    yield from utils.storage_bucket(NAME)
 
 
 @pytest.fixture(scope="session")
-def image_name(configure_docker: None) -> str:
-    # See the `cloudbuild.yaml` for the configuration for this build.
-    substitutions = {
-        "_PYTHON_VERSION": platform.python_version(),
-        "_IMAGE": IMAGE_NAME,
-    }
-    print(f"-- Cloud build substitutions: {substitutions}")
-    subprocess.run(
-        [
-            "gcloud",
-            "builds",
-            "submit",
-            f"--project={PROJECT}",
-            f"--substitutions={','.join([k + '=' + v for k, v in substitutions.items()])}",
-            "--timeout=30m",
-            "--quiet",
-        ],
-        check=True,
-    )
-
-    yield f"gcr.io/{PROJECT}/{IMAGE_NAME}"
-
-    # Delete the image when we're done.
-    subprocess.run(
-        [
-            "gcloud",
-            "container",
-            "images",
-            "delete",
-            f"gcr.io/{PROJECT}/{IMAGE_NAME}",
-            f"--project={PROJECT}",
-            "--quiet",
-        ],
-        check=True,
-    )
+def worker_image(utils: Utils) -> str:
+    yield from utils.cloud_build_submit(NAME, config="build.yaml")
 
 
-def test_end_to_end(bucket_name: str, image_name: str) -> None:
+def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
-    gpu_type = "nvidia-tesla-t4"
-    subprocess.run(
-        [
-            "python",
-            "landsat_view.py",
-            f"--output-path-prefix=gs://{bucket_name}/outputs/",
-            "--runner=DataflowRunner",
-            f"--job_name=gpu-workers-{SUFFIX}",
-            f"--project={PROJECT}",
-            f"--region={REGION}",
-            f"--temp_location=gs://{bucket_name}/temp",
-            "--worker_machine_type=custom-1-13312-ext",
-            "--disk_size_gb=300",
-            f"--worker_harness_container_image={image_name}",
-            f"--worker_zone={ZONE}",
-            f"--experiments=worker_accelerator=type={gpu_type},count=1,install-nvidia-driver",
-            "--experiments=use_runner_v2",
-        ],
-        check=True,
+    utils.cloud_build_submit(
+        config="run.yaml",
+        substitutions={"_GCS_PATH": f"gs://{bucket_name}"},
     )
 
     # Check that output files were created and are not empty.
diff --git a/dataflow/gpu-workers/tensorflow-landsat/run.yaml b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
index f447bfcbbc6..8b8a6b655cc 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
@@ -26,7 +26,7 @@
 # -----------------------------------------------------------------------------
 
 substitutions:
-  _OUTPUT_PATH: please set --substitutions _OUTPUT_PATH=gs://my-bucket/output/path
+  _GCS_PATH: please set --substitutions _GCS_PATH=gs://my-bucket/samples/dataflow/landsat
   _REGION: us-central1
   _WORKER_ZONE: us-central1-f
   _GPU_TYPE: nvidia-tesla-t4
@@ -37,10 +37,11 @@ steps:
   entrypoint: python
   args:
   - /pipeline/main.py
-  - --output-path-prefix=$_OUTPUT_PATH
+  - --output-path-prefix=$_GCS_PATH/outputs/
   - --runner=DataflowRunner
   - --project=$PROJECT_ID
   - --region=$_REGION
+  - --temp_location=$_GCS_PATH/temp
   - --worker_machine_type=custom-1-13312-ext
   - --worker_harness_container_image=gcr.io/$PROJECT_ID/samples/dataflow/landsat-gpu:latest
   - --worker_zone=$_WORKER_ZONE
diff --git a/dataflow/gpu-workers/tensorflow-minimal/run.yaml b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
index 036db374e0d..769ca1eae3f 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
@@ -26,6 +26,7 @@
 # -----------------------------------------------------------------------------
 
 substitutions:
+  _TEMP_LOCATION: ''
   _REGION: us-central1
   _WORKER_ZONE: us-central1-f
   _GPU_TYPE: nvidia-tesla-t4
@@ -39,6 +40,7 @@ steps:
   - --runner=DataflowRunner
   - --project=$PROJECT_ID
   - --region=$_REGION
+  - --temp_location=$_TEMP_LOCATION
   - --worker_harness_container_image=gcr.io/$PROJECT_ID/samples/dataflow/tensorflow-gpu:latest
   - --worker_zone=$_WORKER_ZONE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver

From ed2d91d9675e66bfa302d0b4917264dbae9a3f13 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 7 Jun 2021 14:59:12 -0700
Subject: [PATCH 03/87] update dockerignore and cloudignore

---
 dataflow/gpu-workers/tensorflow-landsat/.dockerignore | 11 +++++------
 dataflow/gpu-workers/tensorflow-landsat/.gcloudignore | 11 +++++------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/dataflow/gpu-workers/tensorflow-landsat/.dockerignore b/dataflow/gpu-workers/tensorflow-landsat/.dockerignore
index 04f5ec66ca6..775d845fa58 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/.dockerignore
+++ b/dataflow/gpu-workers/tensorflow-landsat/.dockerignore
@@ -1,6 +1,5 @@
-# Ignore files for docker.
-.mypy_cache/
-.nox/
-__pycache__/
-env/
-outputs/
+# Ignore everything except the source files.
+**/*
+!Dockerfile
+!requirements.txt
+!*.py
diff --git a/dataflow/gpu-workers/tensorflow-landsat/.gcloudignore b/dataflow/gpu-workers/tensorflow-landsat/.gcloudignore
index cda483971fd..775d845fa58 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/.gcloudignore
+++ b/dataflow/gpu-workers/tensorflow-landsat/.gcloudignore
@@ -1,6 +1,5 @@
-# Ignore files for gcloud like Cloud Build.
-.mypy_cache/
-.nox/
-__pycache__/
-env/
-outputs/
+# Ignore everything except the source files.
+**/*
+!Dockerfile
+!requirements.txt
+!*.py

From 7969369e9efe3487a657bccc6cf047639989ccf3 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 7 Jun 2021 14:59:53 -0700
Subject: [PATCH 04/87] remove old config

---
 .../tensorflow-landsat/cloudbuild.yaml        | 35 -------------------
 1 file changed, 35 deletions(-)
 delete mode 100644 dataflow/gpu-workers/tensorflow-landsat/cloudbuild.yaml

diff --git a/dataflow/gpu-workers/tensorflow-landsat/cloudbuild.yaml b/dataflow/gpu-workers/tensorflow-landsat/cloudbuild.yaml
deleted file mode 100644
index dec3d7aabb8..00000000000
--- a/dataflow/gpu-workers/tensorflow-landsat/cloudbuild.yaml
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright 2021 Google LLC
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#      http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# To build the container image:
-#   PYTHON_VERSION=`python -c 'import platform; print(platform.python_version())'`
-#   gcloud builds submit --substitutions _PYTHON_VERSION=$PYTHON_VERSION . --timeout 20m
-
-steps:
-  # Build the container image with the Python version of our choice.
-  - name: gcr.io/cloud-builders/docker
-    args:
-      [ 'build'
-      , '--build-arg=python_version=$_PYTHON_VERSION'
-      , '--tag=gcr.io/$PROJECT_ID/$_IMAGE'
-      , '.'
-      ]
-
-  # Push the image to Container Registry.
-  - name: gcr.io/cloud-builders/docker
-    args: [ 'push', 'gcr.io/$PROJECT_ID/$_IMAGE' ]
-
-substitutions:
-  _PYTHON_VERSION: '3.8'
-  _IMAGE: samples/dataflow/tensorflow-gpu:latest

From d34a2cac36d87590f67f8ed335867498516616e0 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 7 Jun 2021 15:02:55 -0700
Subject: [PATCH 05/87] update header year

---
 dataflow/gpu-workers/tensorflow-landsat/e2e_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index ae49e73f2fb..d8e93467fb4 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 193ce0e0b726014494aef70ae4dfd27ec3731679 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 7 Jun 2021 15:09:28 -0700
Subject: [PATCH 06/87] removed unused import

---
 dataflow/gpu-workers/pytorch-minimal/e2e_test.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
index d0a25e927d9..b1d612593f6 100644
--- a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
@@ -20,7 +20,6 @@
     from conftest import Utils
 except ModuleNotFoundError:
     Utils = None
-from google.cloud import storage
 import pytest
 
 NAME = "dataflow-gpu-pytorch"

From abcbaaed0f9b822e6dea8e81dc6fa3cbd918ff7c Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 7 Jun 2021 15:09:37 -0700
Subject: [PATCH 07/87] add test

---
 .../tensorflow-minimal/e2e_test.py            | 43 +++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/e2e_test.py

diff --git a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
new file mode 100644
index 00000000000..de5b46738ae
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
@@ -0,0 +1,43 @@
+#!/usr/bin/env python
+
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+try:
+    # `conftest` cannot be imported when running in `nox`, but we still
+    # try to import it for the autocomplete when writing the tests.
+    from conftest import Utils
+except ModuleNotFoundError:
+    Utils = None
+import pytest
+
+NAME = "dataflow-gpu-tensorflow"
+
+
+@pytest.fixture(scope="session")
+def bucket_name(utils: Utils) -> str:
+    yield from utils.storage_bucket(NAME)
+
+
+@pytest.fixture(scope="session")
+def worker_image(utils: Utils) -> str:
+    yield from utils.cloud_build_submit(NAME, config="build.yaml")
+
+
+def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
+    # Run the Beam pipeline in Dataflow making sure GPUs are used.
+    utils.cloud_build_submit(
+        config="run.yaml",
+        substitutions={"_TEMP_LOCATION": f"gs://{bucket_name}/temp"},
+    )

From f6af3384e2b343e249a6506b9341c9ececc5bb56 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 7 Jun 2021 16:42:34 -0700
Subject: [PATCH 08/87] make image name unique

---
 dataflow/conftest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index f8191effd89..97675101450 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -200,7 +200,7 @@ def cloud_build_submit(
             ]
             print(cmd)
             subprocess.run(cmd, check=True)
-            yield f"gcr.io/{project}/{image_name}:latest"
+            yield f"gcr.io/{project}/{image_name}-{UUID}:latest"
         else:
             raise ValueError("must specify either `config` or `image_name`")
 
@@ -210,7 +210,7 @@ def cloud_build_submit(
                 "container",
                 "images",
                 "delete",
-                f"gcr.io/{project}/{image_name}:latest",
+                f"gcr.io/{project}/{image_name}-{UUID}:latest",
                 f"--project={project}",
                 "--quiet",
             ]

From ba72e54eae3017f900c346f3b9aa6a4318f1e7c0 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 7 Jun 2021 17:15:11 -0700
Subject: [PATCH 09/87] fix image name

---
 dataflow/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 97675101450..01917e47bc1 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -194,7 +194,7 @@ def cloud_build_submit(
                 "builds",
                 "submit",
                 f"--project={project}",
-                f"--tag=gcr.io/{project}/{image_name}:latest",
+                f"--tag=gcr.io/{project}/{image_name}-{UUID}:latest",
                 *cmd_substitutions,
                 ".",
             ]

From ec4bcdedbdd90b221b291f7c43157507e6f57750 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 9 Jun 2021 12:25:42 -0700
Subject: [PATCH 10/87] fix resource name issues

---
 dataflow/conftest.py                          | 82 ++++++-------------
 .../flex-templates/streaming_beam/e2e_test.py |  4 +-
 .../gpu-workers/pytorch-minimal/README.md     |  3 +-
 .../gpu-workers/pytorch-minimal/build.yaml    | 10 +--
 .../gpu-workers/pytorch-minimal/e2e_test.py   |  7 +-
 dataflow/gpu-workers/pytorch-minimal/main.py  |  9 +-
 dataflow/gpu-workers/pytorch-minimal/run.yaml |  9 +-
 .../gpu-workers/tensorflow-landsat/README.md  |  5 +-
 .../gpu-workers/tensorflow-landsat/build.yaml | 10 +--
 .../tensorflow-landsat/e2e_test.py            | 10 ++-
 .../gpu-workers/tensorflow-landsat/main.py    | 15 ++--
 .../gpu-workers/tensorflow-landsat/run.yaml   | 14 ++--
 .../gpu-workers/tensorflow-minimal/README.md  |  3 +-
 .../gpu-workers/tensorflow-minimal/build.yaml | 10 +--
 .../tensorflow-minimal/e2e_test.py            |  7 +-
 .../gpu-workers/tensorflow-minimal/main.py    |  8 +-
 .../gpu-workers/tensorflow-minimal/run.yaml   |  7 +-
 17 files changed, 99 insertions(+), 114 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 01917e47bc1..046afb39daa 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -15,6 +15,7 @@
 import json
 import multiprocessing as mp
 import os
+import re
 import subprocess
 import sys
 import time
@@ -31,6 +32,9 @@
 
 RETRY_MAX_TIME = 5 * 60  # 5 minutes in seconds
 
+HYPHEN_NAME_RE = re.compile(r"[^\w\d-]+")
+UNDERSCORE_NAME_RE = re.compile(r"[^\w\d_]+")
+
 
 @dataclass
 class Utils:
@@ -40,25 +44,33 @@ class Utils:
     zone: str = ZONE
 
     @staticmethod
-    def storage_bucket(bucket_name: str) -> str:
+    def hyphen_name(name: str) -> str:
+        return f"{HYPHEN_NAME_RE.sub('-', name)}-{UUID}"
+
+    @staticmethod
+    def underscore_name(name: str) -> str:
+        return f"{UNDERSCORE_NAME_RE.sub('_', name)}-{UUID}"
+
+    @staticmethod
+    def storage_bucket(name: str) -> str:
         from google.cloud import storage
 
         storage_client = storage.Client()
-        bucket_unique_name = f"{bucket_name}-{UUID}"
-        bucket = storage_client.create_bucket(bucket_unique_name)
+        bucket = storage_client.create_bucket(Utils.hyphen_name(name))
 
-        print(f"storage_bucket: {bucket_unique_name}")
-        yield bucket_unique_name
+        print(f"storage_bucket: {bucket.name}")
+        yield bucket.name
 
         bucket.delete(force=True)
 
     @staticmethod
-    def bigquery_dataset(dataset_name: str, project: str = PROJECT) -> str:
+    def bigquery_dataset(name: str, project: str = PROJECT) -> str:
         from google.cloud import bigquery
 
         bigquery_client = bigquery.Client()
+
         dataset = bigquery_client.create_dataset(
-            bigquery.Dataset(f"{project}.{dataset_name.replace('-', '_')}_{UUID}")
+            bigquery.Dataset(f"{project}.{Utils.underscore_name(name)}")
         )
 
         print(f"bigquery_dataset: {dataset.full_dataset_id}")
@@ -77,11 +89,11 @@ def bigquery_query(query: str) -> Iterable[Dict[str, Any]]:
             yield dict(row)
 
     @staticmethod
-    def pubsub_topic(topic_name: str, project: str = PROJECT) -> str:
+    def pubsub_topic(name: str, project: str = PROJECT) -> str:
         from google.cloud import pubsub
 
         publisher_client = pubsub.PublisherClient()
-        topic_path = publisher_client.topic_path(project, f"{topic_name}-{UUID}")
+        topic_path = publisher_client.topic_path(project, Utils.hyphen_name(name))
         topic = publisher_client.create_topic(topic_path)
 
         print(f"pubsub_topic: {topic.name}")
@@ -98,14 +110,14 @@ def pubsub_topic(topic_name: str, project: str = PROJECT) -> str:
     @staticmethod
     def pubsub_subscription(
         topic_path: str,
-        subscription_name: str,
+        name: str,
         project: str = PROJECT,
     ) -> str:
         from google.cloud import pubsub
 
         subscriber = pubsub.SubscriberClient()
         subscription_path = subscriber.subscription_path(
-            project, f"{subscription_name}-{UUID}"
+            project, Utils.hyphen_name(name)
         )
         subscription = subscriber.create_subscription(subscription_path, topic_path)
 
@@ -200,7 +212,7 @@ def cloud_build_submit(
             ]
             print(cmd)
             subprocess.run(cmd, check=True)
-            yield f"gcr.io/{project}/{image_name}-{UUID}:latest"
+            yield f"{image_name}-{UUID}:latest"
         else:
             raise ValueError("must specify either `config` or `image_name`")
 
@@ -217,36 +229,6 @@ def cloud_build_submit(
             print(cmd)
             subprocess.run(cmd, check=True)
 
-    @staticmethod
-    def dataflow_job_id_from_job_name(
-        job_name: str,
-        project: str = PROJECT,
-    ) -> Optional[str]:
-        from googleapiclient.discovery import build
-
-        dataflow = build("dataflow", "v1b3")
-
-        # Only return the 50 most recent results - our job is likely to be in here.
-        # If the job is not found, first try increasing this number.[]''job_id
-        # For more info see:
-        #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/list
-        jobs_request = (
-            dataflow.projects()
-            .jobs()
-            .list(
-                projectId=project,
-                filter="ACTIVE",
-                pageSize=50,
-            )
-        )
-        response = jobs_request.execute()
-
-        # Search for the job in the list that has our name (names are unique)
-        for job in response["jobs"]:
-            if job["name"] == job_name:
-                return job["id"]
-        return None
-
     @staticmethod
     def dataflow_jobs_wait(
         job_id: str,
@@ -303,20 +285,10 @@ def dataflow_jobs_cancel_by_job_id(
         ]
         subprocess.run(cmd, check=True)
 
-    @staticmethod
-    def dataflow_jobs_cancel_by_job_name(
-        job_name: str, project: str = PROJECT, region: str = REGION
-    ) -> None:
-        # To cancel a dataflow job, we need its ID, not its name.
-        # If it doesn't, job_id will be equal to None.
-        job_id = Utils.dataflow_job_id_from_job_name(project, job_name)
-        if job_id is not None:
-            Utils.dataflow_jobs_cancel_by_job_id(job_id, project, region)
-
     @staticmethod
     def dataflow_flex_template_build(
         bucket_name: str,
-        template_image: str,
+        image_name: str,
         metadata_file: str,
         project: str = PROJECT,
         template_file: str = "template.json",
@@ -330,7 +302,7 @@ def dataflow_flex_template_build(
             "build",
             template_gcs_path,
             f"--project={project}",
-            f"--image={template_image}",
+            f"--image=gcr.io/{project}/{image_name}",
             "--sdk-language=PYTHON",
             f"--metadata-file={metadata_file}",
         ]
@@ -353,7 +325,7 @@ def dataflow_flex_template_run(
         import yaml
 
         # https://cloud.google.com/sdk/gcloud/reference/dataflow/flex-template/run
-        unique_job_name = f"{job_name}-{UUID}"
+        unique_job_name = Utils.hyphen_name(job_name)
         print(f"dataflow_job_name: {unique_job_name}")
         cmd = [
             "gcloud",
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index 08f0676b446..ce0ba9193fc 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -21,7 +21,7 @@
     Utils = None
 import pytest
 
-NAME = "dataflow-flex-templates-streaming-beam"
+NAME = "dataflow/flex-templates/streaming-beam"
 
 
 @pytest.fixture(scope="session")
@@ -66,7 +66,7 @@ def flex_template_image(utils: Utils) -> str:
 def flex_template_path(utils: Utils, bucket_name: str, flex_template_image: str) -> str:
     yield from utils.dataflow_flex_template_build(
         bucket_name=bucket_name,
-        template_image=flex_template_image,
+        image_name=flex_template_image,
         metadata_file="metadata.json",
     )
 
diff --git a/dataflow/gpu-workers/pytorch-minimal/README.md b/dataflow/gpu-workers/pytorch-minimal/README.md
index 15a81d95fc7..4c8b7fa8370 100644
--- a/dataflow/gpu-workers/pytorch-minimal/README.md
+++ b/dataflow/gpu-workers/pytorch-minimal/README.md
@@ -27,12 +27,11 @@ with the same Python version as the workers.
 
 ```sh
 export REGION="us-central1"
-export WORKER_ZONE="us-central1-f"
 export GPU_TYPE="nvidia-tesla-t4"
 
 gcloud beta builds submit \
     --config run.yaml \
-    --substitutions _REGION=$REGION,_WORKER_ZONE=$WORKER_ZONE,_GPU_TYPE=$GPU_TYPE \
+    --substitutions _REGION=$REGION,_GPU_TYPE=$GPU_TYPE \
     --no-source
 ```
 
diff --git a/dataflow/gpu-workers/pytorch-minimal/build.yaml b/dataflow/gpu-workers/pytorch-minimal/build.yaml
index c72876e2623..eed5c16aa70 100644
--- a/dataflow/gpu-workers/pytorch-minimal/build.yaml
+++ b/dataflow/gpu-workers/pytorch-minimal/build.yaml
@@ -19,14 +19,14 @@
 #   https://cloud.google.com/build/docs/build-config
 # -----------------------------------------------------------------------------
 
+substitutions:
+  _IMAGE: samples/dataflow/pytorch-gpu:latest
+
 steps:
 - name: gcr.io/cloud-builders/docker
-  args:
-  - build
-  - --tag=gcr.io/$PROJECT_ID/samples/dataflow/pytorch-gpu:latest
-  - .
+  args: [ build, --tag=gcr.io/$PROJECT_ID/$_IMAGE, . ]
 
-images: [gcr.io/$PROJECT_ID/samples/dataflow/pytorch-gpu:latest]
+images: [ gcr.io/$PROJECT_ID/$_IMAGE ]
 
 options:
   machineType: E2_HIGHCPU_8
diff --git a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
index b1d612593f6..84a965d4a1f 100644
--- a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
@@ -22,7 +22,7 @@
     Utils = None
 import pytest
 
-NAME = "dataflow-gpu-pytorch"
+NAME = "dataflow/gpu-workers/pytorch-minimal"
 
 
 @pytest.fixture(scope="session")
@@ -39,5 +39,8 @@ def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     utils.cloud_build_submit(
         config="run.yaml",
-        substitutions={"_TEMP_LOCATION": f"gs://{bucket_name}/temp"},
+        substitutions={
+            "_IMAGE": worker_image,
+            "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
+        },
     )
diff --git a/dataflow/gpu-workers/pytorch-minimal/main.py b/dataflow/gpu-workers/pytorch-minimal/main.py
index 19b5a740fba..5ae070d7948 100644
--- a/dataflow/gpu-workers/pytorch-minimal/main.py
+++ b/dataflow/gpu-workers/pytorch-minimal/main.py
@@ -18,6 +18,7 @@
 
 import apache_beam as beam
 from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.pvalue import AsSingleton
 import torch
 
 
@@ -41,7 +42,13 @@ def run(input_text: str, beam_args: Optional[List[str]] = None) -> None:
     (
         pipeline
         | "Create data" >> beam.Create([input_text])
-        | "Check GPU availability" >> beam.Map(check_gpus)
+        | "Check GPU availability"
+        >> beam.Map(
+            lambda x, unused_side_input: x,
+            unused_side_input=beam.pvalue.AsSingleton(
+                pipeline | beam.Create([None]) | beam.Map(check_gpus)
+            ),
+        )
         | "My transform" >> beam.Map(logging.info)
     )
     pipeline.run()
diff --git a/dataflow/gpu-workers/pytorch-minimal/run.yaml b/dataflow/gpu-workers/pytorch-minimal/run.yaml
index c5e74f4d77a..fb915831fe1 100644
--- a/dataflow/gpu-workers/pytorch-minimal/run.yaml
+++ b/dataflow/gpu-workers/pytorch-minimal/run.yaml
@@ -26,14 +26,14 @@
 # -----------------------------------------------------------------------------
 
 substitutions:
+  _IMAGE: samples/dataflow/pytorch-gpu:latest
   _TEMP_LOCATION: ''
   _REGION: us-central1
-  _WORKER_ZONE: us-central1-f
   _GPU_TYPE: nvidia-tesla-t4
   _GPU_COUNT: '1'
 
 steps:
-- name: gcr.io/$PROJECT_ID/samples/dataflow/pytorch-gpu:latest
+- name: gcr.io/$PROJECT_ID/$_IMAGE
   entrypoint: python
   args:
   - /pipeline/main.py
@@ -41,9 +41,8 @@ steps:
   - --project=$PROJECT_ID
   - --region=$_REGION
   - --temp_location=$_TEMP_LOCATION
-  - --worker_harness_container_image=gcr.io/$PROJECT_ID/samples/dataflow/pytorch-gpu:latest
-  - --worker_zone=$_WORKER_ZONE
-  - --disk_size_gb=50
+  - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
+  - --disk_size_gb=20
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
 
diff --git a/dataflow/gpu-workers/tensorflow-landsat/README.md b/dataflow/gpu-workers/tensorflow-landsat/README.md
index eeb91e7a5e6..a9b0fd7aa2a 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/README.md
+++ b/dataflow/gpu-workers/tensorflow-landsat/README.md
@@ -33,14 +33,13 @@ We launch the job using the worker image to make sure the job launches
 with the same Python version as the workers.
 
 ```sh
-export GCS_PATH="gs://$BUCKET/samples/dataflow/landsat"
+export OUTPUT_PATH="gs://$BUCKET/samples/dataflow/landsat/output-images/"
 export REGION="us-central1"
-export WORKER_ZONE="us-central1-f"
 export GPU_TYPE="nvidia-tesla-t4"
 
 gcloud beta builds submit \
     --config run.yaml \
-    --substitutions _GCS_PATH=$GCS_PATH,_REGION=$REGION,_WORKER_ZONE=$WORKER_ZONE,_GPU_TYPE=$GPU_TYPE \
+    --substitutions _GCS_PATH=$GCS_PATH,_REGION=$REGION,_GPU_TYPE=$GPU_TYPE \
     --no-source
 ```
 
diff --git a/dataflow/gpu-workers/tensorflow-landsat/build.yaml b/dataflow/gpu-workers/tensorflow-landsat/build.yaml
index b2b81b8f92d..559452e3868 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/build.yaml
+++ b/dataflow/gpu-workers/tensorflow-landsat/build.yaml
@@ -19,14 +19,14 @@
 #   https://cloud.google.com/build/docs/build-config
 # -----------------------------------------------------------------------------
 
+substitutions:
+  _IMAGE: samples/dataflow/landsat-gpu:latest
+
 steps:
 - name: gcr.io/cloud-builders/docker
-  args:
-  - build
-  - --tag=gcr.io/$PROJECT_ID/samples/dataflow/landsat-gpu:latest
-  - .
+  args: [ build, --tag=gcr.io/$PROJECT_ID/$_IMAGE, . ]
 
-images: [gcr.io/$PROJECT_ID/samples/dataflow/landsat-gpu:latest]
+images: [ gcr.io/$PROJECT_ID/$_IMAGE ]
 
 options:
   machineType: E2_HIGHCPU_8
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index d8e93467fb4..f35530be4f7 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -23,7 +23,7 @@
 from google.cloud import storage
 import pytest
 
-NAME = "dataflow-gpu-landsat"
+NAME = "dataflow/gpu-workers/tensorflow-landsat"
 
 
 @pytest.fixture(scope="session")
@@ -40,12 +40,16 @@ def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     utils.cloud_build_submit(
         config="run.yaml",
-        substitutions={"_GCS_PATH": f"gs://{bucket_name}"},
+        substitutions={
+            "_IMAGE": worker_image,
+            "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
+            "_OUTPUT_PATH": f"gs://{bucket_name}/outputs/",
+        },
     )
 
     # Check that output files were created and are not empty.
     storage_client = storage.Client()
     output_files = list(storage_client.list_blobs(bucket_name, prefix="outputs/"))
-    assert len(output_files) > 0, "No output files found"
+    assert len(output_files) > 0, f"No files found in gs://{bucket_name}/outputs/"
     for output_file in output_files:
         assert output_file.size > 0, f"Output file is empty: {output_file.name}"
diff --git a/dataflow/gpu-workers/tensorflow-landsat/main.py b/dataflow/gpu-workers/tensorflow-landsat/main.py
index 00216422633..ac28f5ecec2 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/main.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/main.py
@@ -138,15 +138,12 @@ def check_gpus(element: Any, gpus_optional: bool) -> Any:
     return element
 
 
-def get_band_paths(
-    scene: str, band_names: List[str], unused_side_input: Any
-) -> Tuple[str, List[str]]:
+def get_band_paths(scene: str, band_names: List[str]) -> Tuple[str, List[str]]:
     """Gets the Cloud Storage paths for each band in a Landsat scene.
 
     Args:
         scene: Landsat 8 scene ID.
         band_names: List of the band names corresponding to [Red, Green, Blue] channels.
-        unused_side_input: Used to wait for the GPU check, can be safely ignored.
 
     Returns:
         A (scene, band_paths) pair.
@@ -288,16 +285,14 @@ def run(
     (
         pipeline
         | "Create scene IDs" >> beam.Create(scenes)
-        | "Get RGB band paths"
+        | "Check GPU availability"
         >> beam.Map(
-            get_band_paths,
-            rgb_band_names,
+            lambda x, unused_side_input: x,
             unused_side_input=beam.pvalue.AsSingleton(
-                pipeline
-                | beam.Create([None])
-                | "Check GPUs" >> beam.Map(check_gpus, gpus_optional)
+                pipeline | beam.Create([None]) | beam.Map(check_gpus)
             ),
         )
+        | "Get RGB band paths" >> beam.Map(get_band_paths, rgb_band_names)
         | "Load RGB band values" >> beam.MapTuple(load_values)
         | "Preprocess pixels"
         >> beam.MapTuple(preprocess_pixels, min_value, max_value, gamma)
diff --git a/dataflow/gpu-workers/tensorflow-landsat/run.yaml b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
index 8b8a6b655cc..022d180ee34 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
@@ -26,25 +26,25 @@
 # -----------------------------------------------------------------------------
 
 substitutions:
-  _GCS_PATH: please set --substitutions _GCS_PATH=gs://my-bucket/samples/dataflow/landsat
+  _OUTPUT_PATH: please run with --substitutions _OUTPUT_PATH=gs://$BUCKET/samples/dataflow/landsat/outputs/
+  _IMAGE: samples/dataflow/landsat-gpu:latest
+  _TEMP_LOCATION: ''
   _REGION: us-central1
-  _WORKER_ZONE: us-central1-f
   _GPU_TYPE: nvidia-tesla-t4
   _GPU_COUNT: '1'
 
 steps:
-- name: gcr.io/$PROJECT_ID/samples/dataflow/landsat-gpu:latest
+- name: gcr.io/$PROJECT_ID/$_IMAGE
   entrypoint: python
   args:
   - /pipeline/main.py
-  - --output-path-prefix=$_GCS_PATH/outputs/
+  - --output-path-prefix=$_OUTPUT_PATH
   - --runner=DataflowRunner
   - --project=$PROJECT_ID
   - --region=$_REGION
-  - --temp_location=$_GCS_PATH/temp
+  - --temp_location=$_TEMP_LOCATION
   - --worker_machine_type=custom-1-13312-ext
-  - --worker_harness_container_image=gcr.io/$PROJECT_ID/samples/dataflow/landsat-gpu:latest
-  - --worker_zone=$_WORKER_ZONE
+  - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
 
diff --git a/dataflow/gpu-workers/tensorflow-minimal/README.md b/dataflow/gpu-workers/tensorflow-minimal/README.md
index 15a81d95fc7..4c8b7fa8370 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/README.md
+++ b/dataflow/gpu-workers/tensorflow-minimal/README.md
@@ -27,12 +27,11 @@ with the same Python version as the workers.
 
 ```sh
 export REGION="us-central1"
-export WORKER_ZONE="us-central1-f"
 export GPU_TYPE="nvidia-tesla-t4"
 
 gcloud beta builds submit \
     --config run.yaml \
-    --substitutions _REGION=$REGION,_WORKER_ZONE=$WORKER_ZONE,_GPU_TYPE=$GPU_TYPE \
+    --substitutions _REGION=$REGION,_GPU_TYPE=$GPU_TYPE \
     --no-source
 ```
 
diff --git a/dataflow/gpu-workers/tensorflow-minimal/build.yaml b/dataflow/gpu-workers/tensorflow-minimal/build.yaml
index 84f60f90255..9362f3c57cb 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/build.yaml
+++ b/dataflow/gpu-workers/tensorflow-minimal/build.yaml
@@ -19,14 +19,14 @@
 #   https://cloud.google.com/build/docs/build-config
 # -----------------------------------------------------------------------------
 
+substitutions:
+  _IMAGE: samples/dataflow/tensorflow-gpu:latest
+
 steps:
 - name: gcr.io/cloud-builders/docker
-  args:
-  - build
-  - --tag=gcr.io/$PROJECT_ID/samples/dataflow/tensorflow-gpu:latest
-  - .
+  args: [ build, --tag=gcr.io/$PROJECT_ID/$_IMAGE, . ]
 
-images: [gcr.io/$PROJECT_ID/samples/dataflow/tensorflow-gpu:latest]
+images: [ gcr.io/$PROJECT_ID/$_IMAGE ]
 
 options:
   machineType: E2_HIGHCPU_8
diff --git a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
index de5b46738ae..73547f1a8bf 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
@@ -22,7 +22,7 @@
     Utils = None
 import pytest
 
-NAME = "dataflow-gpu-tensorflow"
+NAME = "dataflow/gpu-workers/tensorflow-minimal"
 
 
 @pytest.fixture(scope="session")
@@ -39,5 +39,8 @@ def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     utils.cloud_build_submit(
         config="run.yaml",
-        substitutions={"_TEMP_LOCATION": f"gs://{bucket_name}/temp"},
+        substitutions={
+            "_IMAGE": worker_image,
+            "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
+        },
     )
diff --git a/dataflow/gpu-workers/tensorflow-minimal/main.py b/dataflow/gpu-workers/tensorflow-minimal/main.py
index 924f4e0935d..33b295ac69b 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/main.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/main.py
@@ -42,7 +42,13 @@ def run(input_text: str, beam_args: Optional[List[str]] = None) -> None:
     (
         pipeline
         | "Create data" >> beam.Create([input_text])
-        | "Check GPU availability" >> beam.Map(check_gpus)
+        | "Check GPU availability"
+        >> beam.Map(
+            lambda x, unused_side_input: x,
+            unused_side_input=beam.pvalue.AsSingleton(
+                pipeline | beam.Create([None]) | beam.Map(check_gpus)
+            ),
+        )
         | "My transform" >> beam.Map(logging.info)
     )
     pipeline.run()
diff --git a/dataflow/gpu-workers/tensorflow-minimal/run.yaml b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
index 769ca1eae3f..ca69dcaa353 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
@@ -26,14 +26,14 @@
 # -----------------------------------------------------------------------------
 
 substitutions:
+  _IMAGE: samples/dataflow/tensorflow-gpu:latest
   _TEMP_LOCATION: ''
   _REGION: us-central1
-  _WORKER_ZONE: us-central1-f
   _GPU_TYPE: nvidia-tesla-t4
   _GPU_COUNT: '1'
 
 steps:
-- name: gcr.io/$PROJECT_ID/samples/dataflow/tensorflow-gpu:latest
+- name: gcr.io/$PROJECT_ID/$_IMAGE
   entrypoint: python
   args:
   - /pipeline/main.py
@@ -41,8 +41,7 @@ steps:
   - --project=$PROJECT_ID
   - --region=$_REGION
   - --temp_location=$_TEMP_LOCATION
-  - --worker_harness_container_image=gcr.io/$PROJECT_ID/samples/dataflow/tensorflow-gpu:latest
-  - --worker_zone=$_WORKER_ZONE
+  - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
 

From d24fbf9ebe7e51ff2bf971821af1890f21d2ebc1 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 9 Jun 2021 12:37:45 -0700
Subject: [PATCH 11/87] fix lint issues

---
 dataflow/conftest.py                              | 2 +-
 dataflow/flex-templates/__init__.py               | 1 -
 dataflow/gpu-workers/pytorch-minimal/README.md    | 3 ---
 dataflow/gpu-workers/pytorch-minimal/main.py      | 4 ----
 dataflow/gpu-workers/tensorflow-landsat/README.md | 3 ---
 dataflow/gpu-workers/tensorflow-landsat/main.py   | 5 -----
 dataflow/gpu-workers/tensorflow-minimal/README.md | 3 ---
 dataflow/gpu-workers/tensorflow-minimal/main.py   | 3 ---
 8 files changed, 1 insertion(+), 23 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 046afb39daa..bd3e08349be 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -260,7 +260,7 @@ def dataflow_jobs_wait(
                 print(response)
                 if response["currentState"] == status:
                     return True
-            except:
+            except Exception:
                 pass
             time.sleep(sleep_time_seconds)
         return False
diff --git a/dataflow/flex-templates/__init__.py b/dataflow/flex-templates/__init__.py
index 8b137891791..e69de29bb2d 100644
--- a/dataflow/flex-templates/__init__.py
+++ b/dataflow/flex-templates/__init__.py
@@ -1 +0,0 @@
-
diff --git a/dataflow/gpu-workers/pytorch-minimal/README.md b/dataflow/gpu-workers/pytorch-minimal/README.md
index 4c8b7fa8370..3ec270791f9 100644
--- a/dataflow/gpu-workers/pytorch-minimal/README.md
+++ b/dataflow/gpu-workers/pytorch-minimal/README.md
@@ -35,9 +35,6 @@ gcloud beta builds submit \
     --no-source
 ```
 
-> ℹ️ Make sure the GPU type you choose is available in the worker zone for the job.
-> For more information, see [GPU availability](https://cloud.google.com/dataflow/docs/resources/locations#gpu_availability).
-
 ## What's next?
 
 For a more complete example, take a look at
diff --git a/dataflow/gpu-workers/pytorch-minimal/main.py b/dataflow/gpu-workers/pytorch-minimal/main.py
index 5ae070d7948..3b36cc0fcd5 100644
--- a/dataflow/gpu-workers/pytorch-minimal/main.py
+++ b/dataflow/gpu-workers/pytorch-minimal/main.py
@@ -18,7 +18,6 @@
 
 import apache_beam as beam
 from apache_beam.options.pipeline_options import PipelineOptions
-from apache_beam.pvalue import AsSingleton
 import torch
 
 
@@ -35,9 +34,6 @@ def check_gpus(element: Any, gpus_optional: bool = False) -> Any:
 
 def run(input_text: str, beam_args: Optional[List[str]] = None) -> None:
     beam_options = PipelineOptions(beam_args, save_main_session=True)
-
-    # We currently cannot use the `with` statement to run without waiting.
-    #   https://issues.apache.org/jira/browse/BEAM-12455
     pipeline = beam.Pipeline(options=beam_options)
     (
         pipeline
diff --git a/dataflow/gpu-workers/tensorflow-landsat/README.md b/dataflow/gpu-workers/tensorflow-landsat/README.md
index a9b0fd7aa2a..e91193b3a48 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/README.md
+++ b/dataflow/gpu-workers/tensorflow-landsat/README.md
@@ -42,6 +42,3 @@ gcloud beta builds submit \
     --substitutions _GCS_PATH=$GCS_PATH,_REGION=$REGION,_GPU_TYPE=$GPU_TYPE \
     --no-source
 ```
-
-> ℹ️ Make sure the GPU type you choose is available in the worker zone for the job.
-> For more information, see [GPU availability](https://cloud.google.com/dataflow/docs/resources/locations#gpu_availability).
diff --git a/dataflow/gpu-workers/tensorflow-landsat/main.py b/dataflow/gpu-workers/tensorflow-landsat/main.py
index ac28f5ecec2..6691c457d9d 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/main.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/main.py
@@ -278,10 +278,6 @@ def run(
     # We currently cannot use the `with` statement to run without waiting.
     #   https://issues.apache.org/jira/browse/BEAM-12455
     pipeline = beam.Pipeline(options=beam_options)
-
-    # Convert Landsat 8 scenes into images.
-    # ℹ️ We pass `gpu_check` as an unused side input to force that step in
-    # the pipeline to wait for the check before continuing.
     (
         pipeline
         | "Create scene IDs" >> beam.Create(scenes)
@@ -305,7 +301,6 @@ def run(
         )
         | "Save to Cloud Storage" >> beam.MapTuple(save_to_gcs, output_path_prefix)
     )
-
     pipeline.run()
 
 
diff --git a/dataflow/gpu-workers/tensorflow-minimal/README.md b/dataflow/gpu-workers/tensorflow-minimal/README.md
index 4c8b7fa8370..3ec270791f9 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/README.md
+++ b/dataflow/gpu-workers/tensorflow-minimal/README.md
@@ -35,9 +35,6 @@ gcloud beta builds submit \
     --no-source
 ```
 
-> ℹ️ Make sure the GPU type you choose is available in the worker zone for the job.
-> For more information, see [GPU availability](https://cloud.google.com/dataflow/docs/resources/locations#gpu_availability).
-
 ## What's next?
 
 For a more complete example, take a look at
diff --git a/dataflow/gpu-workers/tensorflow-minimal/main.py b/dataflow/gpu-workers/tensorflow-minimal/main.py
index 33b295ac69b..f039034349c 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/main.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/main.py
@@ -35,9 +35,6 @@ def check_gpus(element: Any, gpus_optional: bool = False) -> Any:
 
 def run(input_text: str, beam_args: Optional[List[str]] = None) -> None:
     beam_options = PipelineOptions(beam_args, save_main_session=True)
-
-    # We currently cannot use the `with` statement to run without waiting.
-    #   https://issues.apache.org/jira/browse/BEAM-12455
     pipeline = beam.Pipeline(options=beam_options)
     (
         pipeline

From d0593cbc79e239b16e95ec6c6ef10eee13d8cfac Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 9 Jun 2021 13:08:01 -0700
Subject: [PATCH 12/87] fix underscore_name

---
 dataflow/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index bd3e08349be..7042d3f0327 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -49,7 +49,7 @@ def hyphen_name(name: str) -> str:
 
     @staticmethod
     def underscore_name(name: str) -> str:
-        return f"{UNDERSCORE_NAME_RE.sub('_', name)}-{UUID}"
+        return f"{UNDERSCORE_NAME_RE.sub('_', name)}_{UUID}"
 
     @staticmethod
     def storage_bucket(name: str) -> str:

From 07a4b444f6eef3c0113f4f3db22850f5e155201d Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 9 Jun 2021 13:45:20 -0700
Subject: [PATCH 13/87] wait for jobs

---
 dataflow/conftest.py                          | 104 +++++++++++++-----
 .../flex-templates/streaming_beam/e2e_test.py |   2 +-
 .../gpu-workers/pytorch-minimal/e2e_test.py   |   6 +
 dataflow/gpu-workers/pytorch-minimal/run.yaml |   2 +
 .../tensorflow-landsat/e2e_test.py            |   6 +
 .../gpu-workers/tensorflow-landsat/run.yaml   |   2 +
 .../tensorflow-minimal/e2e_test.py            |   6 +
 .../gpu-workers/tensorflow-minimal/run.yaml   |   2 +
 8 files changed, 102 insertions(+), 28 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 7042d3f0327..c5e9623889a 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -13,13 +13,14 @@
 from dataclasses import dataclass
 import itertools
 import json
+import logging
 import multiprocessing as mp
 import os
 import re
 import subprocess
 import sys
 import time
-from typing import Any, Callable, Dict, Iterable, Optional
+from typing import Any, Callable, Dict, Iterable, List, Optional, Union
 import uuid
 
 import pytest
@@ -230,40 +231,89 @@ def cloud_build_submit(
             subprocess.run(cmd, check=True)
 
     @staticmethod
-    def dataflow_jobs_wait(
-        job_id: str,
+    def dataflow_jobs_get(
+        job_id: Optional[str] = None,
+        job_name: Optional[str] = None,
         project: str = PROJECT,
-        status: str = "JOB_STATE_RUNNING",
-    ) -> bool:
+        region: str = REGION,
+        list_page_size=100,
+    ) -> Optional[Dict[str, Any]]:
         from googleapiclient.discovery import build
 
         dataflow = build("dataflow", "v1b3")
 
-        sleep_time_seconds = 30
-        max_sleep_time = 10 * 60
+        if job_id:
+            # For more info see:
+            #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/get
+            request = (
+                dataflow.projects()
+                .jobs()
+                .get(
+                    projectId=project,
+                    jobId=job_id,
+                    view="JOB_VIEW_SUMMARY",
+                )
+            )
+            job = request.execute()
+            print(job)
+            return job
+
+        elif job_name:
+            # For more info see:
+            #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/list
+            request = (
+                dataflow.projects()
+                .jobs()
+                .list(
+                    projectId=project,
+                    filter="ACTIVE",
+                    pageSize=list_page_size,
+                    location=region,
+                )
+            )
+            for job in request.execute()["jobs"]:
+                if job["name"] == job_name:
+                    print(job)
+                    return job
+            return None
+
+        else:
+            raise ValueError("must specify either `job_id` or `job_name`")
 
-        print(f"Waiting for Dataflow job ID: {job_id} (until status {status})")
-        for _ in range(0, max_sleep_time, sleep_time_seconds):
+    @staticmethod
+    def dataflow_jobs_wait(
+        job_id: Optional[str] = None,
+        job_name: Optional[str] = None,
+        project: str = PROJECT,
+        region: str = REGION,
+        until_status: Union[str, Iterable[str]] = {
+            "JOB_STATE_DONE",
+            "JOB_STATE_FAILED",
+            "JOB_STATE_CANCELLED",
+        },
+        timeout_sec: str = 600,
+        poll_interval_sec=30,
+        list_page_size=100,
+    ) -> Optional[str]:
+        """For a list of all the valid states:
+        https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState
+        """
+        target_status = (
+            {until_status} if isinstance(until_status, str) else set(until_status)
+        )
+        print(f"Waiting for Dataflow job until {target_status}")
+        status = None
+        for _ in range(0, timeout_sec, poll_interval_sec):
             try:
-                # For more info see:
-                #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/get
-                jobs_request = (
-                    dataflow.projects()
-                    .jobs()
-                    .get(
-                        projectId=project,
-                        jobId=job_id,
-                        view="JOB_VIEW_SUMMARY",
-                    )
+                status = Utils.dataflow_jobs_get(
+                    job_id, job_name, project, region, list_page_size
                 )
-                response = jobs_request.execute()
-                print(response)
-                if response["currentState"] == status:
-                    return True
-            except Exception:
-                pass
-            time.sleep(sleep_time_seconds)
-        return False
+                if status in target_status:
+                    return status
+            except Exception as e:
+                logging.warning(e)
+            time.sleep(poll_interval_sec)
+        return status
 
     @staticmethod
     def dataflow_jobs_cancel_by_job_id(
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index ce0ba9193fc..b5816c58c78 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -93,7 +93,7 @@ def test_flex_template_run(
 
     # Since this is a streaming job, it will never finish running.
     # First, lets wait until the job is running.
-    utils.dataflow_jobs_wait(job_id)
+    utils.dataflow_jobs_wait(job_id, until_status="JOB_STATE_RUNNING")
 
     # Then, wait a minute for data to arrive, get processed, and cancel it.
     time.sleep(60)
diff --git a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
index 84a965d4a1f..8d6eee7253e 100644
--- a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
@@ -37,10 +37,16 @@ def worker_image(utils: Utils) -> str:
 
 def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
+    job_name = utils.hyphen_name(NAME)
     utils.cloud_build_submit(
         config="run.yaml",
         substitutions={
             "_IMAGE": worker_image,
+            "_JOB_NAME": job_name,
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
         },
     )
+
+    # Wait until the job finishes.
+    status = utils.dataflow_jobs_wait(job_name=job_name)
+    assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"
diff --git a/dataflow/gpu-workers/pytorch-minimal/run.yaml b/dataflow/gpu-workers/pytorch-minimal/run.yaml
index fb915831fe1..7e352a7c924 100644
--- a/dataflow/gpu-workers/pytorch-minimal/run.yaml
+++ b/dataflow/gpu-workers/pytorch-minimal/run.yaml
@@ -27,6 +27,7 @@
 
 substitutions:
   _IMAGE: samples/dataflow/pytorch-gpu:latest
+  _JOB_NAME: ''
   _TEMP_LOCATION: ''
   _REGION: us-central1
   _GPU_TYPE: nvidia-tesla-t4
@@ -40,6 +41,7 @@ steps:
   - --runner=DataflowRunner
   - --project=$PROJECT_ID
   - --region=$_REGION
+  - --job_name=$_JOB_NAME
   - --temp_location=$_TEMP_LOCATION
   - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --disk_size_gb=20
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index f35530be4f7..9b4007ce9c6 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -38,15 +38,21 @@ def worker_image(utils: Utils) -> str:
 
 def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
+    job_name = utils.hyphen_name(NAME)
     utils.cloud_build_submit(
         config="run.yaml",
         substitutions={
             "_IMAGE": worker_image,
+            "_JOB_NAME": job_name,
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
             "_OUTPUT_PATH": f"gs://{bucket_name}/outputs/",
         },
     )
 
+    # Wait until the job finishes.
+    status = utils.dataflow_jobs_wait(job_name=job_name)
+    assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"
+
     # Check that output files were created and are not empty.
     storage_client = storage.Client()
     output_files = list(storage_client.list_blobs(bucket_name, prefix="outputs/"))
diff --git a/dataflow/gpu-workers/tensorflow-landsat/run.yaml b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
index 022d180ee34..6eb8264e7db 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
@@ -28,6 +28,7 @@
 substitutions:
   _OUTPUT_PATH: please run with --substitutions _OUTPUT_PATH=gs://$BUCKET/samples/dataflow/landsat/outputs/
   _IMAGE: samples/dataflow/landsat-gpu:latest
+  _JOB_NAME: ''
   _TEMP_LOCATION: ''
   _REGION: us-central1
   _GPU_TYPE: nvidia-tesla-t4
@@ -42,6 +43,7 @@ steps:
   - --runner=DataflowRunner
   - --project=$PROJECT_ID
   - --region=$_REGION
+  - --job_name=$_JOB_NAME
   - --temp_location=$_TEMP_LOCATION
   - --worker_machine_type=custom-1-13312-ext
   - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
diff --git a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
index 73547f1a8bf..bce098c9cef 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
@@ -37,10 +37,16 @@ def worker_image(utils: Utils) -> str:
 
 def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
+    job_name = utils.hyphen_name(NAME)
     utils.cloud_build_submit(
         config="run.yaml",
         substitutions={
             "_IMAGE": worker_image,
+            "_JOB_NAME": job_name,
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
         },
     )
+
+    # Wait until the job finishes.
+    status = utils.dataflow_jobs_wait(job_name=job_name)
+    assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"
diff --git a/dataflow/gpu-workers/tensorflow-minimal/run.yaml b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
index ca69dcaa353..75178e5d54c 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
@@ -27,6 +27,7 @@
 
 substitutions:
   _IMAGE: samples/dataflow/tensorflow-gpu:latest
+  _JOB_NAME: ''
   _TEMP_LOCATION: ''
   _REGION: us-central1
   _GPU_TYPE: nvidia-tesla-t4
@@ -40,6 +41,7 @@ steps:
   - --runner=DataflowRunner
   - --project=$PROJECT_ID
   - --region=$_REGION
+  - --job_name=$_JOB_NAME
   - --temp_location=$_TEMP_LOCATION
   - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver

From 17258d4d7ee27fcac7032ba16e5544af214a7d96 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 9 Jun 2021 14:32:37 -0700
Subject: [PATCH 14/87] fix test requirements

---
 dataflow/conftest.py                                        | 6 +++---
 dataflow/gpu-workers/pytorch-minimal/requirements-test.txt  | 3 +++
 .../gpu-workers/tensorflow-landsat/requirements-test.txt    | 1 +
 .../gpu-workers/tensorflow-minimal/requirements-test.txt    | 3 +++
 4 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/requirements-test.txt
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/requirements-test.txt

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index c5e9623889a..c92e1288556 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -207,7 +207,7 @@ def cloud_build_submit(
                 "builds",
                 "submit",
                 f"--project={project}",
-                f"--tag=gcr.io/{project}/{image_name}-{UUID}:latest",
+                f"--tag=gcr.io//{project}/{image_name}-{UUID}:latest",
                 *cmd_substitutions,
                 ".",
             ]
@@ -223,7 +223,7 @@ def cloud_build_submit(
                 "container",
                 "images",
                 "delete",
-                f"gcr.io/{project}/{image_name}-{UUID}:latest",
+                f"gcr.io//{project}/{image_name}-{UUID}:latest",
                 f"--project={project}",
                 "--quiet",
             ]
@@ -352,7 +352,7 @@ def dataflow_flex_template_build(
             "build",
             template_gcs_path,
             f"--project={project}",
-            f"--image=gcr.io/{project}/{image_name}",
+            f"--image=gcr.io//{project}/{image_name}",
             "--sdk-language=PYTHON",
             f"--metadata-file={metadata_file}",
         ]
diff --git a/dataflow/gpu-workers/pytorch-minimal/requirements-test.txt b/dataflow/gpu-workers/pytorch-minimal/requirements-test.txt
new file mode 100644
index 00000000000..4a9e35e0e25
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/requirements-test.txt
@@ -0,0 +1,3 @@
+google-api-python-client==2.1.0
+google-cloud-storage==1.38.0
+pytest==6.2.4
diff --git a/dataflow/gpu-workers/tensorflow-landsat/requirements-test.txt b/dataflow/gpu-workers/tensorflow-landsat/requirements-test.txt
index 9782f5d8d54..4a9e35e0e25 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/requirements-test.txt
+++ b/dataflow/gpu-workers/tensorflow-landsat/requirements-test.txt
@@ -1,2 +1,3 @@
+google-api-python-client==2.1.0
 google-cloud-storage==1.38.0
 pytest==6.2.4
diff --git a/dataflow/gpu-workers/tensorflow-minimal/requirements-test.txt b/dataflow/gpu-workers/tensorflow-minimal/requirements-test.txt
new file mode 100644
index 00000000000..4a9e35e0e25
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/requirements-test.txt
@@ -0,0 +1,3 @@
+google-api-python-client==2.1.0
+google-cloud-storage==1.38.0
+pytest==6.2.4

From dabb8ffe2ca5e81f05ce53d20632c32c495c1207 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 9 Jun 2021 15:46:09 -0700
Subject: [PATCH 15/87] fix image prefix

---
 dataflow/conftest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index c92e1288556..c5e9623889a 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -207,7 +207,7 @@ def cloud_build_submit(
                 "builds",
                 "submit",
                 f"--project={project}",
-                f"--tag=gcr.io//{project}/{image_name}-{UUID}:latest",
+                f"--tag=gcr.io/{project}/{image_name}-{UUID}:latest",
                 *cmd_substitutions,
                 ".",
             ]
@@ -223,7 +223,7 @@ def cloud_build_submit(
                 "container",
                 "images",
                 "delete",
-                f"gcr.io//{project}/{image_name}-{UUID}:latest",
+                f"gcr.io/{project}/{image_name}-{UUID}:latest",
                 f"--project={project}",
                 "--quiet",
             ]
@@ -352,7 +352,7 @@ def dataflow_flex_template_build(
             "build",
             template_gcs_path,
             f"--project={project}",
-            f"--image=gcr.io//{project}/{image_name}",
+            f"--image=gcr.io/{project}/{image_name}",
             "--sdk-language=PYTHON",
             f"--metadata-file={metadata_file}",
         ]

From ffa0476abf4e57a39844588d3783ee953e4663ad Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 9 Jun 2021 17:20:55 -0700
Subject: [PATCH 16/87] get job status for wait

---
 dataflow/conftest.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index c5e9623889a..2b8729fcbe0 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -305,9 +305,10 @@ def dataflow_jobs_wait(
         status = None
         for _ in range(0, timeout_sec, poll_interval_sec):
             try:
-                status = Utils.dataflow_jobs_get(
+                job = Utils.dataflow_jobs_get(
                     job_id, job_name, project, region, list_page_size
                 )
+                status = job["currentStatus"]
                 if status in target_status:
                     return status
             except Exception as e:

From 07d1395d850ff2433e0f9c2baa57facf4cd7550a Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 10 Jun 2021 11:29:03 -0700
Subject: [PATCH 17/87] use correct region

---
 dataflow/conftest.py | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 2b8729fcbe0..5c8260dd2dd 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -225,6 +225,7 @@ def cloud_build_submit(
                 "delete",
                 f"gcr.io/{project}/{image_name}-{UUID}:latest",
                 f"--project={project}",
+                "--force-delete-tags",
                 "--quiet",
             ]
             print(cmd)
@@ -271,7 +272,9 @@ def dataflow_jobs_get(
                     location=region,
                 )
             )
-            for job in request.execute()["jobs"]:
+            response = request.execute()
+            print(response)
+            for job in response["jobs"]:
                 if job["name"] == job_name:
                     print(job)
                     return job
@@ -312,7 +315,7 @@ def dataflow_jobs_wait(
                 if status in target_status:
                     return status
             except Exception as e:
-                logging.warning(e)
+                logging.exception(e)
             time.sleep(poll_interval_sec)
         return status
 

From 6c87851c2ad7bae9ddd8a9468cd477d0a43f2e62 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 10 Jun 2021 11:29:56 -0700
Subject: [PATCH 18/87] make jobs list not region dependent

---
 dataflow/conftest.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 5c8260dd2dd..a876a054b7a 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -236,7 +236,6 @@ def dataflow_jobs_get(
         job_id: Optional[str] = None,
         job_name: Optional[str] = None,
         project: str = PROJECT,
-        region: str = REGION,
         list_page_size=100,
     ) -> Optional[Dict[str, Any]]:
         from googleapiclient.discovery import build
@@ -269,7 +268,6 @@ def dataflow_jobs_get(
                     projectId=project,
                     filter="ACTIVE",
                     pageSize=list_page_size,
-                    location=region,
                 )
             )
             response = request.execute()

From 0d76f0beebf7758b1c3b302bc8d8c7df1ac88b18 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 10 Jun 2021 11:40:00 -0700
Subject: [PATCH 19/87] use uuid for image tags

---
 dataflow/conftest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index a876a054b7a..90271c45f61 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -207,13 +207,13 @@ def cloud_build_submit(
                 "builds",
                 "submit",
                 f"--project={project}",
-                f"--tag=gcr.io/{project}/{image_name}-{UUID}:latest",
+                f"--tag=gcr.io/{project}/{image_name}:{UUID}",
                 *cmd_substitutions,
                 ".",
             ]
             print(cmd)
             subprocess.run(cmd, check=True)
-            yield f"{image_name}-{UUID}:latest"
+            yield f"{image_name}:{UUID}"
         else:
             raise ValueError("must specify either `config` or `image_name`")
 
@@ -223,7 +223,7 @@ def cloud_build_submit(
                 "container",
                 "images",
                 "delete",
-                f"gcr.io/{project}/{image_name}-{UUID}:latest",
+                f"gcr.io/{project}/{image_name}:{UUID}",
                 f"--project={project}",
                 "--force-delete-tags",
                 "--quiet",

From ab9d03924fd61d4e84b2b9d4f6b0a717a47f2a1a Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 10 Jun 2021 11:42:32 -0700
Subject: [PATCH 20/87] update title

---
 dataflow/gpu-workers/tensorflow-landsat/README.md | 2 +-
 dataflow/gpu-workers/tensorflow-minimal/README.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/dataflow/gpu-workers/tensorflow-landsat/README.md b/dataflow/gpu-workers/tensorflow-landsat/README.md
index e91193b3a48..003dfef7294 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/README.md
+++ b/dataflow/gpu-workers/tensorflow-landsat/README.md
@@ -1,4 +1,4 @@
-# Workers with GPUs
+# Processing Landsat satellite images with GPUs
 
 [![Open in Cloud Shell](http://gstatic.com/cloudssh/images/open-btn.svg)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dataflow/gpu-workers/README.md)
 
diff --git a/dataflow/gpu-workers/tensorflow-minimal/README.md b/dataflow/gpu-workers/tensorflow-minimal/README.md
index 3ec270791f9..a645dbb411b 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/README.md
+++ b/dataflow/gpu-workers/tensorflow-minimal/README.md
@@ -1,4 +1,4 @@
-# PyTorch GPU minimal pipeline
+# TensorFlow GPU minimal pipeline
 
 ## Before you begin
 

From 37816360002adef1ebe01d9dcf3cdaad42b6ec98 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 10 Jun 2021 11:44:06 -0700
Subject: [PATCH 21/87] fix run command

---
 dataflow/gpu-workers/tensorflow-landsat/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/gpu-workers/tensorflow-landsat/README.md b/dataflow/gpu-workers/tensorflow-landsat/README.md
index 003dfef7294..7f826e9c00c 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/README.md
+++ b/dataflow/gpu-workers/tensorflow-landsat/README.md
@@ -39,6 +39,6 @@ export GPU_TYPE="nvidia-tesla-t4"
 
 gcloud beta builds submit \
     --config run.yaml \
-    --substitutions _GCS_PATH=$GCS_PATH,_REGION=$REGION,_GPU_TYPE=$GPU_TYPE \
+    --substitutions _OUTPUT_PATH=$OUTPUT_PATH,_REGION=$REGION,_GPU_TYPE=$GPU_TYPE \
     --no-source
 ```

From f1bd64ec62f502f3cc648f1a7c0d78c7ce5dbb9a Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 10 Jun 2021 12:16:48 -0700
Subject: [PATCH 22/87] remove region from job get call

---
 dataflow/conftest.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 90271c45f61..faa5e60a308 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -307,7 +307,10 @@ def dataflow_jobs_wait(
         for _ in range(0, timeout_sec, poll_interval_sec):
             try:
                 job = Utils.dataflow_jobs_get(
-                    job_id, job_name, project, region, list_page_size
+                    job_id=job_id,
+                    job_name=job_name,
+                    project=project,
+                    list_page_size=list_page_size,
                 )
                 status = job["currentStatus"]
                 if status in target_status:

From 6e3a4f69b4e14b92f080db724eb816bf7ea31247 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 10 Jun 2021 13:50:44 -0700
Subject: [PATCH 23/87] launch jobs from fixtures

---
 dataflow/conftest.py                              |  6 ++++--
 dataflow/gpu-workers/pytorch-minimal/e2e_test.py  | 15 +++++++++++----
 .../gpu-workers/tensorflow-landsat/e2e_test.py    | 15 +++++++++++----
 .../gpu-workers/tensorflow-minimal/e2e_test.py    | 15 +++++++++++----
 4 files changed, 37 insertions(+), 14 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index faa5e60a308..f32907442c9 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -180,6 +180,7 @@ def cloud_build_submit(
         """Sends a Cloud Build job, if an image_name is provided it will be deleted at teardown."""
         cmd = ["gcloud", "auth", "configure-docker"]
         print(cmd)
+        subprocess.run(cmd, check=True)
 
         if substitutions:
             cmd_substitutions = [
@@ -188,7 +189,6 @@ def cloud_build_submit(
         else:
             cmd_substitutions = []
 
-        subprocess.run(cmd, check=True)
         if config:
             cmd = [
                 "gcloud",
@@ -318,7 +318,9 @@ def dataflow_jobs_wait(
             except Exception as e:
                 logging.exception(e)
             time.sleep(poll_interval_sec)
-        return status
+        raise RuntimeError(
+            f"Dataflow job not found, job_id={job_id}, job_name={job_name}"
+        )
 
     @staticmethod
     def dataflow_jobs_cancel_by_job_id(
diff --git a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
index 8d6eee7253e..b40cd9a2fe8 100644
--- a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
@@ -35,18 +35,25 @@ def worker_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(NAME, config="build.yaml")
 
 
-def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
+@pytest.fixture(scope="session")
+def job_name(utils: Utils) -> str:
+    yield utils.hyphen_name(NAME)
+
+
+@pytest.fixture(scope="session")
+def run_job(utils: Utils, job_name: str, bucket_name: str, worker_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
-    job_name = utils.hyphen_name(NAME)
-    utils.cloud_build_submit(
+    yield from utils.cloud_build_submit(
         config="run.yaml",
         substitutions={
-            "_IMAGE": worker_image,
             "_JOB_NAME": job_name,
+            "_IMAGE": worker_image,
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
         },
     )
 
+
+def test_pytorch_minimal(utils: Utils, job_name: str, run_job: str) -> None:
     # Wait until the job finishes.
     status = utils.dataflow_jobs_wait(job_name=job_name)
     assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index 9b4007ce9c6..555a99bab42 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -36,19 +36,26 @@ def worker_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(NAME, config="build.yaml")
 
 
-def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
+@pytest.fixture(scope="session")
+def job_name(utils: Utils) -> str:
+    yield utils.hyphen_name(NAME)
+
+
+@pytest.fixture(scope="session")
+def run_job(utils: Utils, job_name: str, bucket_name: str, worker_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
-    job_name = utils.hyphen_name(NAME)
-    utils.cloud_build_submit(
+    yield from utils.cloud_build_submit(
         config="run.yaml",
         substitutions={
-            "_IMAGE": worker_image,
             "_JOB_NAME": job_name,
+            "_IMAGE": worker_image,
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
             "_OUTPUT_PATH": f"gs://{bucket_name}/outputs/",
         },
     )
 
+
+def test_tensorflow_landsat(utils: Utils, job_name: str, run_job: str) -> None:
     # Wait until the job finishes.
     status = utils.dataflow_jobs_wait(job_name=job_name)
     assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"
diff --git a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
index bce098c9cef..6b5025dd5ad 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
@@ -35,18 +35,25 @@ def worker_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(NAME, config="build.yaml")
 
 
-def test_end_to_end(utils: Utils, bucket_name: str, worker_image: str) -> None:
+@pytest.fixture(scope="session")
+def job_name(utils: Utils) -> str:
+    yield utils.hyphen_name(NAME)
+
+
+@pytest.fixture(scope="session")
+def run_job(utils: Utils, job_name: str, bucket_name: str, worker_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
-    job_name = utils.hyphen_name(NAME)
-    utils.cloud_build_submit(
+    yield from utils.cloud_build_submit(
         config="run.yaml",
         substitutions={
-            "_IMAGE": worker_image,
             "_JOB_NAME": job_name,
+            "_IMAGE": worker_image,
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
         },
     )
 
+
+def test_tensorflow_minimal(utils: Utils, job_name: str, run_job: str) -> None:
     # Wait until the job finishes.
     status = utils.dataflow_jobs_wait(job_name=job_name)
     assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"

From b66bf1ef518c839ddc949e7d57b6246e2e68e9d2 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 15 Jun 2021 10:13:51 -0700
Subject: [PATCH 24/87] fix resource names

---
 dataflow/conftest.py                               | 5 +++--
 dataflow/flex-templates/streaming_beam/e2e_test.py | 8 +++++---
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index f32907442c9..b0d4d093e60 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -312,7 +312,7 @@ def dataflow_jobs_wait(
                     project=project,
                     list_page_size=list_page_size,
                 )
-                status = job["currentStatus"]
+                status = job["currentState"]
                 if status in target_status:
                     return status
             except Exception as e:
@@ -323,7 +323,7 @@ def dataflow_jobs_wait(
         )
 
     @staticmethod
-    def dataflow_jobs_cancel_by_job_id(
+    def dataflow_jobs_cancel(
         job_id: str, project: str = PROJECT, region: str = REGION
     ) -> None:
         print(f"Canceling Dataflow job ID: {job_id}")
@@ -340,6 +340,7 @@ def dataflow_jobs_cancel_by_job_id(
             job_id,
             f"--region={region}",
         ]
+        print(cmd)
         subprocess.run(cmd, check=True)
 
     @staticmethod
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index b5816c58c78..e1f6badfa0f 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -10,6 +10,7 @@
 # distributed under the License is distributed on an 'AS IS' BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 
+from conftest import PROJECT
 import json
 import time
 
@@ -78,6 +79,7 @@ def test_flex_template_run(
     pubsub_subscription: str,
     flex_template_path: str,
     bigquery_dataset: str,
+    project: str = PROJECT,
 ) -> None:
 
     bigquery_table = "output_table"
@@ -86,8 +88,8 @@ def test_flex_template_run(
         template_path=flex_template_path,
         bucket_name=bucket_name,
         parameters={
-            "input_subscription": pubsub_subscription,
-            "output_table": f"{bigquery_dataset}.{bigquery_table}",
+            "input_subscription": f"projects/{project}/subscriptions/{pubsub_subscription}",
+            "output_table": f"{project}:{bigquery_dataset}.{bigquery_table}",
         },
     )
 
@@ -97,7 +99,7 @@ def test_flex_template_run(
 
     # Then, wait a minute for data to arrive, get processed, and cancel it.
     time.sleep(60)
-    utils.dataflow_jobs_cancel_by_job_id(job_id)
+    utils.dataflow_jobs_cancel(job_id)
 
     # Check for the output data in BigQuery.
     query = f"SELECT * FROM {bigquery_dataset.replace(':', '.')}.{bigquery_table}"

From 45ba754baf94bcdee732d55b6df10a9c83439328 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 15 Jun 2021 10:53:43 -0700
Subject: [PATCH 25/87] use unique images

---
 dataflow/conftest.py                          | 23 ++++++++++---------
 .../gpu-workers/pytorch-minimal/e2e_test.py   | 23 +++++++++----------
 .../tensorflow-landsat/e2e_test.py            | 23 +++++++++----------
 .../tensorflow-minimal/e2e_test.py            | 23 +++++++++----------
 4 files changed, 45 insertions(+), 47 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index b0d4d093e60..1c6cf467dce 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -190,17 +190,18 @@ def cloud_build_submit(
             cmd_substitutions = []
 
         if config:
-            cmd = [
-                "gcloud",
-                "builds",
-                "submit",
-                f"--project={project}",
-                f"--config={config}",
-                *cmd_substitutions,
-            ]
-            print(cmd)
-            subprocess.run(cmd, check=True)
-            yield config
+            with open(config) as f:
+                cmd = [
+                    "gcloud",
+                    "builds",
+                    "submit",
+                    f"--project={project}",
+                    f"--config={config}",
+                    *cmd_substitutions,
+                ]
+                print(cmd)
+                subprocess.run(cmd, check=True)
+                yield f.read()
         elif image_name:
             cmd = [
                 "gcloud",
diff --git a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
index b40cd9a2fe8..baf6e161617 100644
--- a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
@@ -31,29 +31,28 @@ def bucket_name(utils: Utils) -> str:
 
 
 @pytest.fixture(scope="session")
-def worker_image(utils: Utils) -> str:
-    yield from utils.cloud_build_submit(NAME, config="build.yaml")
-
-
-@pytest.fixture(scope="session")
-def job_name(utils: Utils) -> str:
-    yield utils.hyphen_name(NAME)
+def build_image(utils: Utils) -> str:
+    yield from utils.cloud_build_submit(
+        NAME,
+        config="build.yaml",
+        substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
+    )
 
 
 @pytest.fixture(scope="session")
-def run_job(utils: Utils, job_name: str, bucket_name: str, worker_image: str) -> str:
+def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
         config="run.yaml",
         substitutions={
-            "_JOB_NAME": job_name,
-            "_IMAGE": worker_image,
+            "_JOB_NAME": utils.hyphen_name(NAME),
+            "_IMAGE": f"{NAME}:{utils.uuid}",
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
         },
     )
 
 
-def test_pytorch_minimal(utils: Utils, job_name: str, run_job: str) -> None:
+def test_pytorch_minimal(utils: Utils, run_job: str) -> None:
     # Wait until the job finishes.
-    status = utils.dataflow_jobs_wait(job_name=job_name)
+    status = utils.dataflow_jobs_wait(job_name=utils.hyphen_name(NAME))
     assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index 555a99bab42..f94c9764be4 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -32,32 +32,31 @@ def bucket_name(utils: Utils) -> str:
 
 
 @pytest.fixture(scope="session")
-def worker_image(utils: Utils) -> str:
-    yield from utils.cloud_build_submit(NAME, config="build.yaml")
-
-
-@pytest.fixture(scope="session")
-def job_name(utils: Utils) -> str:
-    yield utils.hyphen_name(NAME)
+def build_image(utils: Utils) -> str:
+    yield from utils.cloud_build_submit(
+        NAME,
+        config="build.yaml",
+        substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
+    )
 
 
 @pytest.fixture(scope="session")
-def run_job(utils: Utils, job_name: str, bucket_name: str, worker_image: str) -> str:
+def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
         config="run.yaml",
         substitutions={
-            "_JOB_NAME": job_name,
-            "_IMAGE": worker_image,
+            "_JOB_NAME": utils.hyphen_name(NAME),
+            "_IMAGE": f"{NAME}:{utils.uuid}",
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
             "_OUTPUT_PATH": f"gs://{bucket_name}/outputs/",
         },
     )
 
 
-def test_tensorflow_landsat(utils: Utils, job_name: str, run_job: str) -> None:
+def test_tensorflow_landsat(utils: Utils, run_job: str) -> None:
     # Wait until the job finishes.
-    status = utils.dataflow_jobs_wait(job_name=job_name)
+    status = utils.dataflow_jobs_wait(job_name=utils.hyphen_name(NAME))
     assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"
 
     # Check that output files were created and are not empty.
diff --git a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
index 6b5025dd5ad..33ddfc71463 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
@@ -31,29 +31,28 @@ def bucket_name(utils: Utils) -> str:
 
 
 @pytest.fixture(scope="session")
-def worker_image(utils: Utils) -> str:
-    yield from utils.cloud_build_submit(NAME, config="build.yaml")
-
-
-@pytest.fixture(scope="session")
-def job_name(utils: Utils) -> str:
-    yield utils.hyphen_name(NAME)
+def build_image(utils: Utils) -> str:
+    yield from utils.cloud_build_submit(
+        NAME,
+        config="build.yaml",
+        substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
+    )
 
 
 @pytest.fixture(scope="session")
-def run_job(utils: Utils, job_name: str, bucket_name: str, worker_image: str) -> str:
+def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
         config="run.yaml",
         substitutions={
-            "_JOB_NAME": job_name,
-            "_IMAGE": worker_image,
+            "_JOB_NAME": utils.hyphen_name(NAME),
+            "_IMAGE": f"{NAME}:{utils.uuid}",
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
         },
     )
 
 
-def test_tensorflow_minimal(utils: Utils, job_name: str, run_job: str) -> None:
+def test_tensorflow_minimal(utils: Utils, run_job: str) -> None:
     # Wait until the job finishes.
-    status = utils.dataflow_jobs_wait(job_name=job_name)
+    status = utils.dataflow_jobs_wait(job_name=utils.hyphen_name(NAME))
     assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"

From c452cc41630ed922624151e9e77c77edaf5eeda4 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 15 Jun 2021 12:19:39 -0700
Subject: [PATCH 26/87] fix lint issues

---
 dataflow/conftest.py                               | 2 +-
 dataflow/flex-templates/streaming_beam/e2e_test.py | 6 ++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 1c6cf467dce..2794a92a9f8 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -20,7 +20,7 @@
 import subprocess
 import sys
 import time
-from typing import Any, Callable, Dict, Iterable, List, Optional, Union
+from typing import Any, Callable, Dict, Iterable, Optional, Union
 import uuid
 
 import pytest
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index e1f6badfa0f..bebd2eb08ac 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -10,7 +10,6 @@
 # distributed under the License is distributed on an 'AS IS' BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 
-from conftest import PROJECT
 import json
 import time
 
@@ -79,7 +78,6 @@ def test_flex_template_run(
     pubsub_subscription: str,
     flex_template_path: str,
     bigquery_dataset: str,
-    project: str = PROJECT,
 ) -> None:
 
     bigquery_table = "output_table"
@@ -88,8 +86,8 @@ def test_flex_template_run(
         template_path=flex_template_path,
         bucket_name=bucket_name,
         parameters={
-            "input_subscription": f"projects/{project}/subscriptions/{pubsub_subscription}",
-            "output_table": f"{project}:{bigquery_dataset}.{bigquery_table}",
+            "input_subscription": f"projects/{utils.project}/subscriptions/{pubsub_subscription}",
+            "output_table": f"{utils.kproject}:{bigquery_dataset}.{bigquery_table}",
         },
     )
 

From 67fa556420ec1702253c9e3622ae61abd83f8820 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 15 Jun 2021 13:16:30 -0700
Subject: [PATCH 27/87] pass --no-source when running the job

---
 dataflow/conftest.py                                | 4 +++-
 dataflow/gpu-workers/pytorch-minimal/e2e_test.py    | 4 +++-
 dataflow/gpu-workers/pytorch-minimal/run.yaml       | 1 +
 dataflow/gpu-workers/tensorflow-landsat/e2e_test.py | 4 +++-
 dataflow/gpu-workers/tensorflow-landsat/run.yaml    | 1 +
 dataflow/gpu-workers/tensorflow-minimal/e2e_test.py | 4 +++-
 dataflow/gpu-workers/tensorflow-minimal/run.yaml    | 1 +
 7 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 2794a92a9f8..bf1918f67de 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -174,6 +174,7 @@ def _infinite_publish_job() -> None:
     def cloud_build_submit(
         image_name: Optional[str] = None,
         config: Optional[str] = None,
+        source: str = ".",
         substitutions: Optional[Dict[str, str]] = None,
         project: str = PROJECT,
     ) -> None:
@@ -198,6 +199,7 @@ def cloud_build_submit(
                     f"--project={project}",
                     f"--config={config}",
                     *cmd_substitutions,
+                    source,
                 ]
                 print(cmd)
                 subprocess.run(cmd, check=True)
@@ -210,7 +212,7 @@ def cloud_build_submit(
                 f"--project={project}",
                 f"--tag=gcr.io/{project}/{image_name}:{UUID}",
                 *cmd_substitutions,
-                ".",
+                source,
             ]
             print(cmd)
             subprocess.run(cmd, check=True)
diff --git a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
index baf6e161617..c24ee471e5d 100644
--- a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
@@ -33,7 +33,7 @@ def bucket_name(utils: Utils) -> str:
 @pytest.fixture(scope="session")
 def build_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(
-        NAME,
+        image_name=NAME,
         config="build.yaml",
         substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
     )
@@ -48,7 +48,9 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
             "_JOB_NAME": utils.hyphen_name(NAME),
             "_IMAGE": f"{NAME}:{utils.uuid}",
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
+            "_REGION": utils.region,
         },
+        source="--no-source",
     )
 
 
diff --git a/dataflow/gpu-workers/pytorch-minimal/run.yaml b/dataflow/gpu-workers/pytorch-minimal/run.yaml
index 7e352a7c924..95508c9fb86 100644
--- a/dataflow/gpu-workers/pytorch-minimal/run.yaml
+++ b/dataflow/gpu-workers/pytorch-minimal/run.yaml
@@ -51,4 +51,5 @@ steps:
 options:
   logging: CLOUD_LOGGING_ONLY
 
+# Use the Compute Engine default service account to launch the job.
 serviceAccount: projects/$PROJECT_ID/serviceAccounts/$PROJECT_NUMBER-compute@developer.gserviceaccount.com
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index f94c9764be4..6f9d58a5760 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -34,7 +34,7 @@ def bucket_name(utils: Utils) -> str:
 @pytest.fixture(scope="session")
 def build_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(
-        NAME,
+        image_name=NAME,
         config="build.yaml",
         substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
     )
@@ -49,8 +49,10 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
             "_JOB_NAME": utils.hyphen_name(NAME),
             "_IMAGE": f"{NAME}:{utils.uuid}",
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
+            "_REGION": utils.region,
             "_OUTPUT_PATH": f"gs://{bucket_name}/outputs/",
         },
+        source="--no-source",
     )
 
 
diff --git a/dataflow/gpu-workers/tensorflow-landsat/run.yaml b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
index 6eb8264e7db..416a2f9e6e1 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
@@ -53,4 +53,5 @@ steps:
 options:
   logging: CLOUD_LOGGING_ONLY
 
+# Use the Compute Engine default service account to launch the job.
 serviceAccount: projects/$PROJECT_ID/serviceAccounts/$PROJECT_NUMBER-compute@developer.gserviceaccount.com
diff --git a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
index 33ddfc71463..ebac5c00dd7 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
@@ -33,7 +33,7 @@ def bucket_name(utils: Utils) -> str:
 @pytest.fixture(scope="session")
 def build_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(
-        NAME,
+        image_name=NAME,
         config="build.yaml",
         substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
     )
@@ -48,7 +48,9 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
             "_JOB_NAME": utils.hyphen_name(NAME),
             "_IMAGE": f"{NAME}:{utils.uuid}",
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
+            "_REGION": utils.region,
         },
+        source="--no-source",
     )
 
 
diff --git a/dataflow/gpu-workers/tensorflow-minimal/run.yaml b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
index 75178e5d54c..337634629e8 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
@@ -50,4 +50,5 @@ steps:
 options:
   logging: CLOUD_LOGGING_ONLY
 
+# Use the Compute Engine default service account to launch the job.
 serviceAccount: projects/$PROJECT_ID/serviceAccounts/$PROJECT_NUMBER-compute@developer.gserviceaccount.com

From 7aabf8b53dd7a618f9e0e1d30a9cce4c56b58665 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 15 Jun 2021 14:58:18 -0700
Subject: [PATCH 28/87] more logging and changed region

---
 dataflow/conftest.py                          | 30 ++++++++++++-------
 .../flex-templates/streaming_beam/e2e_test.py |  2 +-
 2 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index bf1918f67de..f6a5bb977ca 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -28,8 +28,7 @@
 # Default options.
 UUID = uuid.uuid4().hex[0:6]
 PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"]
-REGION = "us-west1"
-ZONE = "us-west1-b"
+REGION = "us-central1"
 
 RETRY_MAX_TIME = 5 * 60  # 5 minutes in seconds
 
@@ -42,7 +41,6 @@ class Utils:
     uuid: str = UUID
     project: str = PROJECT
     region: str = REGION
-    zone: str = ZONE
 
     @staticmethod
     def hyphen_name(name: str) -> str:
@@ -59,10 +57,11 @@ def storage_bucket(name: str) -> str:
         storage_client = storage.Client()
         bucket = storage_client.create_bucket(Utils.hyphen_name(name))
 
-        print(f"storage_bucket: {bucket.name}")
+        print(f"Created storage_bucket: {bucket.name}")
         yield bucket.name
 
         bucket.delete(force=True)
+        print(f"Deleted storage_bucket: {bucket.name}")
 
     @staticmethod
     def bigquery_dataset(name: str, project: str = PROJECT) -> str:
@@ -74,12 +73,13 @@ def bigquery_dataset(name: str, project: str = PROJECT) -> str:
             bigquery.Dataset(f"{project}.{Utils.underscore_name(name)}")
         )
 
-        print(f"bigquery_dataset: {dataset.full_dataset_id}")
+        print(f"Created bigquery_dataset: {dataset.full_dataset_id}")
         yield dataset.full_dataset_id
 
         bigquery_client.delete_dataset(
             dataset.full_dataset_id.replace(":", "."), delete_contents=True
         )
+        print(f"Deleted bigquery_dataset: {dataset.full_dataset_id}")
 
     @staticmethod
     def bigquery_query(query: str) -> Iterable[Dict[str, Any]]:
@@ -97,7 +97,7 @@ def pubsub_topic(name: str, project: str = PROJECT) -> str:
         topic_path = publisher_client.topic_path(project, Utils.hyphen_name(name))
         topic = publisher_client.create_topic(topic_path)
 
-        print(f"pubsub_topic: {topic.name}")
+        print(f"Created pubsub_topic: {topic.name}")
         yield topic.name
 
         # Due to the pinned library dependencies in apache-beam, client
@@ -107,6 +107,7 @@ def pubsub_topic(name: str, project: str = PROJECT) -> str:
         cmd = ["gcloud", "pubsub", "--project", project, "topics", "delete", topic.name]
         print(cmd)
         subprocess.run(cmd, check=True)
+        print(f"Deleted pubsub_topic: {topic.name}")
 
     @staticmethod
     def pubsub_subscription(
@@ -122,7 +123,7 @@ def pubsub_subscription(
         )
         subscription = subscriber.create_subscription(subscription_path, topic_path)
 
-        print(f"pubsub_subscription: {subscription.name}")
+        print(f"Created pubsub_subscription: {subscription.name}")
         yield subscription.name
 
         # Due to the pinned library dependencies in apache-beam, client
@@ -140,6 +141,7 @@ def pubsub_subscription(
         ]
         print(cmd)
         subprocess.run(cmd, check=True)
+        print(f"Deleted pubsub_subscription: {subscription.name}")
 
     @staticmethod
     def pubsub_publisher(
@@ -203,6 +205,7 @@ def cloud_build_submit(
                 ]
                 print(cmd)
                 subprocess.run(cmd, check=True)
+                print(f"Cloud build finished successfully: {config}")
                 yield f.read()
         elif image_name:
             cmd = [
@@ -216,6 +219,7 @@ def cloud_build_submit(
             ]
             print(cmd)
             subprocess.run(cmd, check=True)
+            print(f"Created image: gcr.io/{project}/{image_name}:{UUID}")
             yield f"{image_name}:{UUID}"
         else:
             raise ValueError("must specify either `config` or `image_name`")
@@ -233,6 +237,7 @@ def cloud_build_submit(
             ]
             print(cmd)
             subprocess.run(cmd, check=True)
+            print(f"Deleted image: gcr.io/{project}/{image_name}:{UUID}")
 
     @staticmethod
     def dataflow_jobs_get(
@@ -258,7 +263,7 @@ def dataflow_jobs_get(
                 )
             )
             job = request.execute()
-            print(job)
+            print(f"Dataflow job: {job}")
             return job
 
         elif job_name:
@@ -274,10 +279,10 @@ def dataflow_jobs_get(
                 )
             )
             response = request.execute()
-            print(response)
+            print(f"Finding job {job_name}, response={response}")
             for job in response["jobs"]:
                 if job["name"] == job_name:
-                    print(job)
+                    print(f"Dataflow job: {job}")
                     return job
             return None
 
@@ -305,7 +310,9 @@ def dataflow_jobs_wait(
         target_status = (
             {until_status} if isinstance(until_status, str) else set(until_status)
         )
-        print(f"Waiting for Dataflow job until {target_status}")
+        print(
+            f"Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
+        )
         status = None
         for _ in range(0, timeout_sec, poll_interval_sec):
             try:
@@ -345,6 +352,7 @@ def dataflow_jobs_cancel(
         ]
         print(cmd)
         subprocess.run(cmd, check=True)
+        print(f"Cancelled Dataflow job: {job_id}")
 
     @staticmethod
     def dataflow_flex_template_build(
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index bebd2eb08ac..561f0c9af10 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -87,7 +87,7 @@ def test_flex_template_run(
         bucket_name=bucket_name,
         parameters={
             "input_subscription": f"projects/{utils.project}/subscriptions/{pubsub_subscription}",
-            "output_table": f"{utils.kproject}:{bigquery_dataset}.{bigquery_table}",
+            "output_table": f"{utils.project}:{bigquery_dataset}.{bigquery_table}",
         },
     )
 

From 5f58ba746cbff034404ce71149f6575c495ee31f Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 15 Jun 2021 15:48:38 -0700
Subject: [PATCH 29/87] simplified type hints

---
 dataflow/gpu-workers/pytorch-minimal/main.py  |  5 +--
 .../pytorch-minimal/noxfile_config.py         | 38 +++++++++++++++++++
 .../gpu-workers/tensorflow-landsat/main.py    | 35 ++++-------------
 .../gpu-workers/tensorflow-minimal/main.py    |  5 +--
 .../tensorflow-minimal/noxfile_config.py      | 38 +++++++++++++++++++
 5 files changed, 88 insertions(+), 33 deletions(-)
 create mode 100644 dataflow/gpu-workers/pytorch-minimal/noxfile_config.py
 create mode 100644 dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py

diff --git a/dataflow/gpu-workers/pytorch-minimal/main.py b/dataflow/gpu-workers/pytorch-minimal/main.py
index 3b36cc0fcd5..b939b33fa53 100644
--- a/dataflow/gpu-workers/pytorch-minimal/main.py
+++ b/dataflow/gpu-workers/pytorch-minimal/main.py
@@ -14,14 +14,14 @@
 
 import argparse
 import logging
-from typing import Any, List, Optional
+from typing import List, Optional
 
 import apache_beam as beam
 from apache_beam.options.pipeline_options import PipelineOptions
 import torch
 
 
-def check_gpus(element: Any, gpus_optional: bool = False) -> Any:
+def check_gpus(_: None, gpus_optional: bool = False) -> None:
     """Validates that we are detecting GPUs, otherwise raise a RuntimeError."""
     if torch.cuda.is_available():
         logging.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
@@ -29,7 +29,6 @@ def check_gpus(element: Any, gpus_optional: bool = False) -> Any:
         logging.warning("No GPUs found, defaulting to CPU.")
     else:
         raise RuntimeError("No GPUs found.")
-    return element
 
 
 def run(input_text: str, beam_args: Optional[List[str]] = None) -> None:
diff --git a/dataflow/gpu-workers/pytorch-minimal/noxfile_config.py b/dataflow/gpu-workers/pytorch-minimal/noxfile_config.py
new file mode 100644
index 00000000000..74d736256c6
--- /dev/null
+++ b/dataflow/gpu-workers/pytorch-minimal/noxfile_config.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default TEST_CONFIG_OVERRIDE for python repos.
+
+# You can copy this file into your directory, then it will be imported from
+# the noxfile.py.
+
+# The source of truth:
+# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py
+
+TEST_CONFIG_OVERRIDE = {
+    # You can opt out from the test for specific Python versions.
+    "ignored_versions": ["2.7", "3.9"],
+    # Old samples are opted out of enforcing Python type hints
+    # All new samples should feature them
+    "enforce_type_hints": True,
+    # An envvar key for determining the project id to use. Change it
+    # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+    # build specific Cloud project. You can also use your own string
+    # to use your own Cloud project.
+    "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+    # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+    # A dictionary you want to inject into your test. Don't put any
+    # secrets here. These values will override predefined values.
+    "envs": {},
+}
diff --git a/dataflow/gpu-workers/tensorflow-landsat/main.py b/dataflow/gpu-workers/tensorflow-landsat/main.py
index 6691c457d9d..c35cf1cca0f 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/main.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/main.py
@@ -111,31 +111,15 @@
 )
 
 
-def check_gpus(element: Any, gpus_optional: bool) -> Any:
-    """Makes sure TensorFlow detects GPUs, otherwise raise a RuntimeError.
-
-    Note that this function must be run within a PTransform like beam.Map so
-    we are sure it's run by the workers, and not the launcher process.
-
-    Args:
-        element: An element
-        gpus_optional: If True, the pipeline won't crash if GPUs are not found.
-
-    Returns:
-        The same element it received as is.
-
-    Raises:
-        RuntimeError: If no GPUs were found by TensorFlow.
-    """
-    # Make sure we have a GPU available.
+def check_gpus(_: None, gpus_optional: bool = False) -> None:
+    """Validates that we are detecting GPUs, otherwise raise a RuntimeError."""
     gpu_devices = tf.config.list_physical_devices("GPU")
-    logging.info(f"GPU devices: {gpu_devices}")
-    if len(gpu_devices) == 0:
-        if gpus_optional:
-            logging.warning("No GPUs found, defaulting to CPU.")
-        else:
-            raise RuntimeError("No GPUs found.")
-    return element
+    if gpu_devices:
+        logging.info(f"Using GPU: {gpu_devices}")
+    elif gpus_optional:
+        logging.warning("No GPUs found, defaulting to CPU.")
+    else:
+        raise RuntimeError("No GPUs found.")
 
 
 def get_band_paths(scene: str, band_names: List[str]) -> Tuple[str, List[str]]:
@@ -274,9 +258,6 @@ def run(
     gamma = vis_params["gamma"]
 
     beam_options = PipelineOptions(beam_args, save_main_session=True)
-
-    # We currently cannot use the `with` statement to run without waiting.
-    #   https://issues.apache.org/jira/browse/BEAM-12455
     pipeline = beam.Pipeline(options=beam_options)
     (
         pipeline
diff --git a/dataflow/gpu-workers/tensorflow-minimal/main.py b/dataflow/gpu-workers/tensorflow-minimal/main.py
index f039034349c..6732d95392f 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/main.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/main.py
@@ -14,14 +14,14 @@
 
 import argparse
 import logging
-from typing import Any, List, Optional
+from typing import List, Optional
 
 import apache_beam as beam
 from apache_beam.options.pipeline_options import PipelineOptions
 import tensorflow as tf
 
 
-def check_gpus(element: Any, gpus_optional: bool = False) -> Any:
+def check_gpus(_: None, gpus_optional: bool = False) -> None:
     """Validates that we are detecting GPUs, otherwise raise a RuntimeError."""
     gpu_devices = tf.config.list_physical_devices("GPU")
     if gpu_devices:
@@ -30,7 +30,6 @@ def check_gpus(element: Any, gpus_optional: bool = False) -> Any:
         logging.warning("No GPUs found, defaulting to CPU.")
     else:
         raise RuntimeError("No GPUs found.")
-    return element
 
 
 def run(input_text: str, beam_args: Optional[List[str]] = None) -> None:
diff --git a/dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py b/dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py
new file mode 100644
index 00000000000..74d736256c6
--- /dev/null
+++ b/dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default TEST_CONFIG_OVERRIDE for python repos.
+
+# You can copy this file into your directory, then it will be imported from
+# the noxfile.py.
+
+# The source of truth:
+# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py
+
+TEST_CONFIG_OVERRIDE = {
+    # You can opt out from the test for specific Python versions.
+    "ignored_versions": ["2.7", "3.9"],
+    # Old samples are opted out of enforcing Python type hints
+    # All new samples should feature them
+    "enforce_type_hints": True,
+    # An envvar key for determining the project id to use. Change it
+    # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+    # build specific Cloud project. You can also use your own string
+    # to use your own Cloud project.
+    "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+    # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+    # A dictionary you want to inject into your test. Don't put any
+    # secrets here. These values will override predefined values.
+    "envs": {},
+}

From c59bbe66d5d4167a8a00a9b9d9333a5d5dd61886 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 15 Jun 2021 15:58:09 -0700
Subject: [PATCH 30/87] get all jobs and update wait job status

---
 dataflow/conftest.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index f6a5bb977ca..359184a6df0 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -273,8 +273,9 @@ def dataflow_jobs_get(
                 dataflow.projects()
                 .jobs()
                 .list(
+                    # We don't filter="ACTIVE" because we still want to return the
+                    # job if it failed, is already done, or was cancelled.
                     projectId=project,
-                    filter="ACTIVE",
                     pageSize=list_page_size,
                 )
             )
@@ -295,11 +296,7 @@ def dataflow_jobs_wait(
         job_name: Optional[str] = None,
         project: str = PROJECT,
         region: str = REGION,
-        until_status: Union[str, Iterable[str]] = {
-            "JOB_STATE_DONE",
-            "JOB_STATE_FAILED",
-            "JOB_STATE_CANCELLED",
-        },
+        until_status: str = "JOB_STATE_DONE",
         timeout_sec: str = 600,
         poll_interval_sec=30,
         list_page_size=100,
@@ -307,9 +304,14 @@ def dataflow_jobs_wait(
         """For a list of all the valid states:
         https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState
         """
-        target_status = (
-            {until_status} if isinstance(until_status, str) else set(until_status)
-        )
+
+        # Wait until we reach the desired status, or the job finished in some way.
+        target_status = {
+            until_status,
+            "JOB_STATE_DONE",
+            "JOB_STATE_FAILED",
+            "JOB_STATE_CANCELLED",
+        }
         print(
             f"Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
         )

From 9ae84e7046ffcdffaa1fb0ccd7d31ef455086e56 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 15 Jun 2021 16:00:11 -0700
Subject: [PATCH 31/87] update header years

---
 dataflow/gpu-workers/pytorch-minimal/noxfile_config.py    | 2 +-
 dataflow/gpu-workers/tensorflow-landsat/noxfile_config.py | 2 +-
 dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dataflow/gpu-workers/pytorch-minimal/noxfile_config.py b/dataflow/gpu-workers/pytorch-minimal/noxfile_config.py
index 74d736256c6..d8e9aba4fdd 100644
--- a/dataflow/gpu-workers/pytorch-minimal/noxfile_config.py
+++ b/dataflow/gpu-workers/pytorch-minimal/noxfile_config.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/dataflow/gpu-workers/tensorflow-landsat/noxfile_config.py b/dataflow/gpu-workers/tensorflow-landsat/noxfile_config.py
index 74d736256c6..d8e9aba4fdd 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/noxfile_config.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/noxfile_config.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py b/dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py
index 74d736256c6..d8e9aba4fdd 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py
@@ -1,4 +1,4 @@
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 95320731ae9cf640c92f39c3abdc51d06751ded9 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 15 Jun 2021 17:44:06 -0700
Subject: [PATCH 32/87] tune logging

---
 dataflow/conftest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 359184a6df0..eaa14dc99f7 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -263,7 +263,7 @@ def dataflow_jobs_get(
                 )
             )
             job = request.execute()
-            print(f"Dataflow job: {job}")
+            print(f"Found Dataflow job: {job}")
             return job
 
         elif job_name:
@@ -280,10 +280,10 @@ def dataflow_jobs_get(
                 )
             )
             response = request.execute()
-            print(f"Finding job {job_name}, response={response}")
+            print(f"Finding Dataflow job {job_name}")
             for job in response["jobs"]:
                 if job["name"] == job_name:
-                    print(f"Dataflow job: {job}")
+                    print(f"Found job: {job}")
                     return job
             return None
 

From 10214b5408587cdd4f35f4edc48e1bd5cc8ba932 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 09:15:59 -0700
Subject: [PATCH 33/87] fix tests

---
 dataflow/conftest.py                                | 4 ++--
 dataflow/flex-templates/streaming_beam/e2e_test.py  | 4 ++--
 dataflow/gpu-workers/pytorch-minimal/run.yaml       | 1 -
 dataflow/gpu-workers/tensorflow-landsat/e2e_test.py | 2 +-
 dataflow/gpu-workers/tensorflow-landsat/main.py     | 9 +--------
 5 files changed, 6 insertions(+), 14 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index eaa14dc99f7..281d712cdb6 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -427,8 +427,8 @@ def dataflow_flex_template_run(
             print(f"Launched Dataflow Flex Template job: {unique_job_name}")
         except subprocess.CalledProcessError as e:
             print(e, file=sys.stderr)
-            stdout = stdout.decode("utf-8")
-            stderr = stderr.decode("utf-8")
+            stdout = e.stdout.decode("utf-8")
+            stderr = e.stderr.decode("utf-8")
         finally:
             print("--- stderr ---")
             print(stderr)
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index 561f0c9af10..3d592c8eddd 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -86,8 +86,8 @@ def test_flex_template_run(
         template_path=flex_template_path,
         bucket_name=bucket_name,
         parameters={
-            "input_subscription": f"projects/{utils.project}/subscriptions/{pubsub_subscription}",
-            "output_table": f"{utils.project}:{bigquery_dataset}.{bigquery_table}",
+            "input_subscription": pubsub_subscription,
+            "output_table": f"{bigquery_dataset}.{bigquery_table}",
         },
     )
 
diff --git a/dataflow/gpu-workers/pytorch-minimal/run.yaml b/dataflow/gpu-workers/pytorch-minimal/run.yaml
index 95508c9fb86..7873f59e857 100644
--- a/dataflow/gpu-workers/pytorch-minimal/run.yaml
+++ b/dataflow/gpu-workers/pytorch-minimal/run.yaml
@@ -44,7 +44,6 @@ steps:
   - --job_name=$_JOB_NAME
   - --temp_location=$_TEMP_LOCATION
   - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
-  - --disk_size_gb=20
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
 
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index 6f9d58a5760..e50e20a88f9 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -50,7 +50,7 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
             "_IMAGE": f"{NAME}:{utils.uuid}",
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
             "_REGION": utils.region,
-            "_OUTPUT_PATH": f"gs://{bucket_name}/outputs/",
+            "_OUTPUT_PATH": f"gs://{bucket_name}/outputs",
         },
         source="--no-source",
     )
diff --git a/dataflow/gpu-workers/tensorflow-landsat/main.py b/dataflow/gpu-workers/tensorflow-landsat/main.py
index c35cf1cca0f..21a31c69d48 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/main.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/main.py
@@ -240,7 +240,6 @@ def run(
     scenes: List[str],
     output_path_prefix: str,
     vis_params: Dict[str, Any],
-    gpus_optional: bool,
     beam_args: Optional[List[str]] = None,
 ) -> None:
     """Load multiple Landsat scenes and render them as JPEG files.
@@ -249,7 +248,6 @@ def run(
         scenes: List of Landsat 8 scene IDs.
         output_path_prefix: Path prefix to save the output files.
         vis_params: Visualization parameters including {rgb_bands, min, max, gamma}.
-        gpus_optional: If True, the pipeline won't crash if GPUs are not found.
         beam_args: Optional list of arguments for Beam pipeline options.
     """
     rgb_band_names = vis_params["rgb_band_names"]
@@ -325,11 +323,6 @@ def run(
     parser.add_argument(
         "--gamma", type=float, default=DEFAULT_GAMMA, help="Gamma correction factor."
     )
-    parser.add_argument(
-        "--gpus-optional",
-        action="store_true",
-        help="If set, the pipeline won't crash if GPUs are not found.",
-    )
     args, beam_args = parser.parse_known_args()
 
     scenes = args.scenes or DEFAULT_SCENES
@@ -339,4 +332,4 @@ def run(
         "max": args.max,
         "gamma": args.gamma,
     }
-    run(scenes, args.output_path_prefix, vis_params, args.gpus_optional, beam_args)
+    run(scenes, args.output_path_prefix, vis_params, beam_args)

From 41501a9b41af2537f4d70a8acf8a56b6184f1120 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 10:45:38 -0700
Subject: [PATCH 34/87] made logging more explicit

---
 dataflow/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 281d712cdb6..22e13492060 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -331,7 +331,7 @@ def dataflow_jobs_wait(
                 logging.exception(e)
             time.sleep(poll_interval_sec)
         raise RuntimeError(
-            f"Dataflow job not found, job_id={job_id}, job_name={job_name}"
+            f"Dataflow job not found in status {target_status}: job_id={job_id}, job_name={job_name}"
         )
 
     @staticmethod

From 3c6883856c2b983417a06e373d50eee0bbd1af58 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 10:45:55 -0700
Subject: [PATCH 35/87] use disk_size_gb

---
 dataflow/gpu-workers/pytorch-minimal/run.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dataflow/gpu-workers/pytorch-minimal/run.yaml b/dataflow/gpu-workers/pytorch-minimal/run.yaml
index 7873f59e857..2933dcfcf52 100644
--- a/dataflow/gpu-workers/pytorch-minimal/run.yaml
+++ b/dataflow/gpu-workers/pytorch-minimal/run.yaml
@@ -46,6 +46,7 @@ steps:
   - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
+  - --disk_size_gb=50
 
 options:
   logging: CLOUD_LOGGING_ONLY

From 2081cadc86f9b174c7ab8dde6a38c589350e31ba Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 10:59:15 -0700
Subject: [PATCH 36/87] use larger disk_size_gb for landsat

---
 dataflow/gpu-workers/tensorflow-landsat/run.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dataflow/gpu-workers/tensorflow-landsat/run.yaml b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
index 416a2f9e6e1..9147814d954 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
@@ -49,6 +49,7 @@ steps:
   - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
+  - --disk_size_gb=50
 
 options:
   logging: CLOUD_LOGGING_ONLY

From 314ffbde966414a1397891909e776983a480df19 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 12:16:54 -0700
Subject: [PATCH 37/87] list all jobs

---
 dataflow/conftest.py                          | 51 +++++++++++--------
 .../tensorflow-landsat/e2e_test.py            |  5 +-
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 22e13492060..abf8544eb97 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -239,12 +239,37 @@ def cloud_build_submit(
             subprocess.run(cmd, check=True)
             print(f"Deleted image: gcr.io/{project}/{image_name}:{UUID}")
 
+    @staticmethod
+    def dataflow_jobs_list(
+        project: str = PROJECT, page_size: int = 30
+    ) -> Iterable[dict]:
+        from googleapiclient.discovery import build
+
+        dataflow = build("dataflow", "v1b3")
+
+        response = {"nextPageToken": None}
+        while "nextPageToken" in response:
+            # For more info see:
+            #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/list
+            request = (
+                dataflow.projects()
+                .jobs()
+                .list(
+                    projectId=project,
+                    pageToken=response["nextPageToken"],
+                    pageSize=page_size,
+                )
+            )
+            response = request.execute()
+            for job in response["jobs"]:
+                yield job
+
     @staticmethod
     def dataflow_jobs_get(
         job_id: Optional[str] = None,
         job_name: Optional[str] = None,
         project: str = PROJECT,
-        list_page_size=100,
+        list_page_size=30,
     ) -> Optional[Dict[str, Any]]:
         from googleapiclient.discovery import build
 
@@ -262,30 +287,17 @@ def dataflow_jobs_get(
                     view="JOB_VIEW_SUMMARY",
                 )
             )
+            # If the job is not found, this throws an HttpError exception.
             job = request.execute()
             print(f"Found Dataflow job: {job}")
             return job
 
         elif job_name:
-            # For more info see:
-            #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/list
-            request = (
-                dataflow.projects()
-                .jobs()
-                .list(
-                    # We don't filter="ACTIVE" because we still want to return the
-                    # job if it failed, is already done, or was cancelled.
-                    projectId=project,
-                    pageSize=list_page_size,
-                )
-            )
-            response = request.execute()
-            print(f"Finding Dataflow job {job_name}")
-            for job in response["jobs"]:
+            for job in Utils.dataflow_jobs_list(project, list_page_size):
                 if job["name"] == job_name:
-                    print(f"Found job: {job}")
+                    print(f"Found Dataflow job: {job}")
                     return job
-            return None
+            raise ValueError(f"Dataflow job not found: job_name={job_name}")
 
         else:
             raise ValueError("must specify either `job_id` or `job_name`")
@@ -295,9 +307,8 @@ def dataflow_jobs_wait(
         job_id: Optional[str] = None,
         job_name: Optional[str] = None,
         project: str = PROJECT,
-        region: str = REGION,
         until_status: str = "JOB_STATE_DONE",
-        timeout_sec: str = 600,
+        timeout_sec: str = 600,  # defaults to 10 minutes
         poll_interval_sec=30,
         list_page_size=100,
     ) -> Optional[str]:
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index e50e20a88f9..f7a6c681fb9 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -58,7 +58,10 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
 
 def test_tensorflow_landsat(utils: Utils, run_job: str) -> None:
     # Wait until the job finishes.
-    status = utils.dataflow_jobs_wait(job_name=utils.hyphen_name(NAME))
+    timeout = 20 * 60  # 20 minutes
+    status = utils.dataflow_jobs_wait(
+        job_name=utils.hyphen_name(NAME), timeout_sec=timeout
+    )
     assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"
 
     # Check that output files were created and are not empty.

From 83de54822c32689732fa6a0ad0f0de802a1e5690 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 12:38:13 -0700
Subject: [PATCH 38/87] wait before querying

---
 dataflow/flex-templates/streaming_beam/e2e_test.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index 3d592c8eddd..503ce6ef65e 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -99,6 +99,10 @@ def test_flex_template_run(
     time.sleep(60)
     utils.dataflow_jobs_cancel(job_id)
 
+    # After cancelling, wait a minute to make sure the table is created in BigQuery.
+    # TODO: poll for this with a timeout inside `bigquery_query`
+    time.sleep(60)
+
     # Check for the output data in BigQuery.
     query = f"SELECT * FROM {bigquery_dataset.replace(':', '.')}.{bigquery_table}"
     rows = list(utils.bigquery_query(query))

From c1ee4e9e835dcc2dd5f423d313983ae78b73c401 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 12:53:26 -0700
Subject: [PATCH 39/87] include python version on resource names

---
 dataflow/conftest.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index abf8544eb97..3c0c533fc08 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -17,6 +17,7 @@
 import multiprocessing as mp
 import os
 import re
+import platform
 import subprocess
 import sys
 import time
@@ -35,6 +36,8 @@
 HYPHEN_NAME_RE = re.compile(r"[^\w\d-]+")
 UNDERSCORE_NAME_RE = re.compile(r"[^\w\d_]+")
 
+PYTHON_VERSION = "".join(platform.python_version_tuple()[0:2])
+
 
 @dataclass
 class Utils:
@@ -44,11 +47,12 @@ class Utils:
 
     @staticmethod
     def hyphen_name(name: str) -> str:
-        return f"{HYPHEN_NAME_RE.sub('-', name)}-{UUID}"
+        unique_name = f"{name}-{PYTHON_VERSION}-{UUID}"
+        return HYPHEN_NAME_RE.sub("-", unique_name)
 
     @staticmethod
     def underscore_name(name: str) -> str:
-        return f"{UNDERSCORE_NAME_RE.sub('_', name)}_{UUID}"
+        return UNDERSCORE_NAME_RE.sub("_", Utils.hyphen_name(name))
 
     @staticmethod
     def storage_bucket(name: str) -> str:

From d0c4533f4a760762afb84b4d90f6871d94d4e191 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 12:54:50 -0700
Subject: [PATCH 40/87] update beam version

---
 dataflow/flex-templates/streaming_beam/requirements.txt  | 2 +-
 dataflow/gpu-workers/pytorch-minimal/requirements.txt    | 2 +-
 dataflow/gpu-workers/tensorflow-minimal/requirements.txt | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dataflow/flex-templates/streaming_beam/requirements.txt b/dataflow/flex-templates/streaming_beam/requirements.txt
index 7c934ad8979..009bc29b6d8 100644
--- a/dataflow/flex-templates/streaming_beam/requirements.txt
+++ b/dataflow/flex-templates/streaming_beam/requirements.txt
@@ -1 +1 @@
-apache-beam[gcp]==2.29.0
+apache-beam[gcp]==2.30.0
diff --git a/dataflow/gpu-workers/pytorch-minimal/requirements.txt b/dataflow/gpu-workers/pytorch-minimal/requirements.txt
index 530aa4098e7..ad5777b6ca6 100644
--- a/dataflow/gpu-workers/pytorch-minimal/requirements.txt
+++ b/dataflow/gpu-workers/pytorch-minimal/requirements.txt
@@ -1,2 +1,2 @@
-apache-beam[gcp]==2.29.0
+apache-beam[gcp]==2.30.0
 torch==1.8.1
diff --git a/dataflow/gpu-workers/tensorflow-minimal/requirements.txt b/dataflow/gpu-workers/tensorflow-minimal/requirements.txt
index f2f6e11354a..aa9e7e634f5 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/requirements.txt
+++ b/dataflow/gpu-workers/tensorflow-minimal/requirements.txt
@@ -1,2 +1,2 @@
-apache-beam[gcp]==2.29.0
+apache-beam[gcp]==2.30.0
 tensorflow==2.5.0

From 8a10eb29841a3ba29840b9b4b12a52137a32db30 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 12:57:21 -0700
Subject: [PATCH 41/87] use 30m timeout

---
 dataflow/gpu-workers/tensorflow-landsat/e2e_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index f7a6c681fb9..d79af9cdae0 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -58,7 +58,7 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
 
 def test_tensorflow_landsat(utils: Utils, run_job: str) -> None:
     # Wait until the job finishes.
-    timeout = 20 * 60  # 20 minutes
+    timeout = 30 * 60  # 30 minutes
     status = utils.dataflow_jobs_wait(
         job_name=utils.hyphen_name(NAME), timeout_sec=timeout
     )

From 35775b125c7e96c21eff5d7b1dea6fec36175fb9 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 14:12:02 -0700
Subject: [PATCH 42/87] more debugging and increase waiting time

---
 dataflow/conftest.py                                | 10 ++++++++++
 dataflow/flex-templates/streaming_beam/e2e_test.py  |  8 ++------
 dataflow/gpu-workers/tensorflow-landsat/e2e_test.py |  2 +-
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 3c0c533fc08..3b1c0d52bbb 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -64,6 +64,16 @@ def storage_bucket(name: str) -> str:
         print(f"Created storage_bucket: {bucket.name}")
         yield bucket.name
 
+        # Print all the objects in the bucket before deleting for debugging.
+        print(f"Deleting bucket {bucket.name} with the following contents:")
+        total_files = 0
+        total_size = 0
+        for blob in bucket.list_blobs():
+            print(f"- {blob.name} ({blob.size} bytes)")
+            total_files += 1
+            total_size += blob.size
+        print(f"Total {total_files} files ({total_size} bytes)")
+
         bucket.delete(force=True)
         print(f"Deleted storage_bucket: {bucket.name}")
 
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index 503ce6ef65e..fc2198bcbfa 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -95,14 +95,10 @@ def test_flex_template_run(
     # First, lets wait until the job is running.
     utils.dataflow_jobs_wait(job_id, until_status="JOB_STATE_RUNNING")
 
-    # Then, wait a minute for data to arrive, get processed, and cancel it.
-    time.sleep(60)
+    # Then, wait a couple minutes for data to arrive, get processed, and cancel it.
+    time.sleep(2 * 60)
     utils.dataflow_jobs_cancel(job_id)
 
-    # After cancelling, wait a minute to make sure the table is created in BigQuery.
-    # TODO: poll for this with a timeout inside `bigquery_query`
-    time.sleep(60)
-
     # Check for the output data in BigQuery.
     query = f"SELECT * FROM {bigquery_dataset.replace(':', '.')}.{bigquery_table}"
     rows = list(utils.bigquery_query(query))
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index d79af9cdae0..50f01202b1b 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -50,7 +50,7 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
             "_IMAGE": f"{NAME}:{utils.uuid}",
             "_TEMP_LOCATION": f"gs://{bucket_name}/temp",
             "_REGION": utils.region,
-            "_OUTPUT_PATH": f"gs://{bucket_name}/outputs",
+            "_OUTPUT_PATH": f"gs://{bucket_name}/outputs/",
         },
         source="--no-source",
     )

From 9484ce9c8df808c66fec1fd1988867b9ed45f7bf Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 14:35:04 -0700
Subject: [PATCH 43/87] update bq query

---
 dataflow/conftest.py                               | 1 +
 dataflow/flex-templates/streaming_beam/e2e_test.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 3b1c0d52bbb..1de640c0bbf 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -100,6 +100,7 @@ def bigquery_query(query: str) -> Iterable[Dict[str, Any]]:
         from google.cloud import bigquery
 
         bigquery_client = bigquery.Client()
+        print(f"Bigquery query: {query}")
         for row in bigquery_client.query(query):
             yield dict(row)
 
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index fc2198bcbfa..c2c83ad02ea 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -100,7 +100,7 @@ def test_flex_template_run(
     utils.dataflow_jobs_cancel(job_id)
 
     # Check for the output data in BigQuery.
-    query = f"SELECT * FROM {bigquery_dataset.replace(':', '.')}.{bigquery_table}"
+    query = f"SELECT * FROM `{bigquery_dataset.replace(':', '.')}.{bigquery_table}`"
     rows = list(utils.bigquery_query(query))
     assert len(rows) > 0
     for row in rows:

From d94d6880e0be35cc884af83a4a7c258aaac08c6b Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 14:39:51 -0700
Subject: [PATCH 44/87] use shorter flag name alias

---
 dataflow/gpu-workers/pytorch-minimal/run.yaml    | 2 +-
 dataflow/gpu-workers/tensorflow-landsat/run.yaml | 2 +-
 dataflow/gpu-workers/tensorflow-minimal/run.yaml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dataflow/gpu-workers/pytorch-minimal/run.yaml b/dataflow/gpu-workers/pytorch-minimal/run.yaml
index 2933dcfcf52..fe285533c54 100644
--- a/dataflow/gpu-workers/pytorch-minimal/run.yaml
+++ b/dataflow/gpu-workers/pytorch-minimal/run.yaml
@@ -43,7 +43,7 @@ steps:
   - --region=$_REGION
   - --job_name=$_JOB_NAME
   - --temp_location=$_TEMP_LOCATION
-  - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
+  - --sdk_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
   - --disk_size_gb=50
diff --git a/dataflow/gpu-workers/tensorflow-landsat/run.yaml b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
index 9147814d954..2b97dd28e48 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
@@ -46,7 +46,7 @@ steps:
   - --job_name=$_JOB_NAME
   - --temp_location=$_TEMP_LOCATION
   - --worker_machine_type=custom-1-13312-ext
-  - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
+  - --sdk_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
   - --disk_size_gb=50
diff --git a/dataflow/gpu-workers/tensorflow-minimal/run.yaml b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
index 337634629e8..f5d2a77c0d8 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
@@ -43,7 +43,7 @@ steps:
   - --region=$_REGION
   - --job_name=$_JOB_NAME
   - --temp_location=$_TEMP_LOCATION
-  - --worker_harness_container_image=gcr.io/$PROJECT_ID/$_IMAGE
+  - --sdk_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
 

From 1887870ad63a715b9372d86550e2f2d46212003f Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 15:13:14 -0700
Subject: [PATCH 45/87] made logs clearer

---
 dataflow/conftest.py | 72 ++++++++++++++++++++++----------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 1de640c0bbf..14fe00fbe82 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -47,7 +47,7 @@ class Utils:
 
     @staticmethod
     def hyphen_name(name: str) -> str:
-        unique_name = f"{name}-{PYTHON_VERSION}-{UUID}"
+        unique_name = f"{name}-py{PYTHON_VERSION}-{UUID}"
         return HYPHEN_NAME_RE.sub("-", unique_name)
 
     @staticmethod
@@ -61,21 +61,21 @@ def storage_bucket(name: str) -> str:
         storage_client = storage.Client()
         bucket = storage_client.create_bucket(Utils.hyphen_name(name))
 
-        print(f"Created storage_bucket: {bucket.name}")
+        print(f">> Created storage_bucket: {bucket.name}")
         yield bucket.name
 
         # Print all the objects in the bucket before deleting for debugging.
-        print(f"Deleting bucket {bucket.name} with the following contents:")
+        print(f">> Deleting bucket {bucket.name} with the following contents:")
         total_files = 0
         total_size = 0
         for blob in bucket.list_blobs():
-            print(f"- {blob.name} ({blob.size} bytes)")
+            print(f"  - {blob.name} ({blob.size} bytes)")
             total_files += 1
             total_size += blob.size
-        print(f"Total {total_files} files ({total_size} bytes)")
+        print(f">> Total {total_files} files ({total_size} bytes)")
 
         bucket.delete(force=True)
-        print(f"Deleted storage_bucket: {bucket.name}")
+        print(f">> Deleted storage_bucket: {bucket.name}")
 
     @staticmethod
     def bigquery_dataset(name: str, project: str = PROJECT) -> str:
@@ -87,20 +87,20 @@ def bigquery_dataset(name: str, project: str = PROJECT) -> str:
             bigquery.Dataset(f"{project}.{Utils.underscore_name(name)}")
         )
 
-        print(f"Created bigquery_dataset: {dataset.full_dataset_id}")
+        print(f">> Created bigquery_dataset: {dataset.full_dataset_id}")
         yield dataset.full_dataset_id
 
         bigquery_client.delete_dataset(
             dataset.full_dataset_id.replace(":", "."), delete_contents=True
         )
-        print(f"Deleted bigquery_dataset: {dataset.full_dataset_id}")
+        print(f">> Deleted bigquery_dataset: {dataset.full_dataset_id}")
 
     @staticmethod
     def bigquery_query(query: str) -> Iterable[Dict[str, Any]]:
         from google.cloud import bigquery
 
         bigquery_client = bigquery.Client()
-        print(f"Bigquery query: {query}")
+        print(f">> Bigquery query: {query}")
         for row in bigquery_client.query(query):
             yield dict(row)
 
@@ -112,7 +112,7 @@ def pubsub_topic(name: str, project: str = PROJECT) -> str:
         topic_path = publisher_client.topic_path(project, Utils.hyphen_name(name))
         topic = publisher_client.create_topic(topic_path)
 
-        print(f"Created pubsub_topic: {topic.name}")
+        print(f">> Created pubsub_topic: {topic.name}")
         yield topic.name
 
         # Due to the pinned library dependencies in apache-beam, client
@@ -120,9 +120,9 @@ def pubsub_topic(name: str, project: str = PROJECT) -> str:
         # We use gcloud for a workaround. See also:
         # https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4492
         cmd = ["gcloud", "pubsub", "--project", project, "topics", "delete", topic.name]
-        print(cmd)
+        print(f">> {cmd}")
         subprocess.run(cmd, check=True)
-        print(f"Deleted pubsub_topic: {topic.name}")
+        print(f">> Deleted pubsub_topic: {topic.name}")
 
     @staticmethod
     def pubsub_subscription(
@@ -138,7 +138,7 @@ def pubsub_subscription(
         )
         subscription = subscriber.create_subscription(subscription_path, topic_path)
 
-        print(f"Created pubsub_subscription: {subscription.name}")
+        print(f">> Created pubsub_subscription: {subscription.name}")
         yield subscription.name
 
         # Due to the pinned library dependencies in apache-beam, client
@@ -154,9 +154,9 @@ def pubsub_subscription(
             "delete",
             subscription.name,
         ]
-        print(cmd)
+        print(f">> {cmd}")
         subprocess.run(cmd, check=True)
-        print(f"Deleted pubsub_subscription: {subscription.name}")
+        print(f">> Deleted pubsub_subscription: {subscription.name}")
 
     @staticmethod
     def pubsub_publisher(
@@ -176,14 +176,14 @@ def _infinite_publish_job() -> None:
                 time.sleep(sleep_sec)
 
         # Start a subprocess in the background to do the publishing.
-        print(f"Starting publisher on {topic_path}")
+        print(f">> Starting publisher on {topic_path}")
         p = mp.Process(target=_infinite_publish_job)
         p.start()
 
         yield p.is_alive()
 
         # For cleanup, terminate the background process.
-        print("Stopping publisher")
+        print(">> Stopping publisher")
         p.join(timeout=0)
         p.terminate()
 
@@ -197,7 +197,7 @@ def cloud_build_submit(
     ) -> None:
         """Sends a Cloud Build job, if an image_name is provided it will be deleted at teardown."""
         cmd = ["gcloud", "auth", "configure-docker"]
-        print(cmd)
+        print(f">> {cmd}")
         subprocess.run(cmd, check=True)
 
         if substitutions:
@@ -218,9 +218,9 @@ def cloud_build_submit(
                     *cmd_substitutions,
                     source,
                 ]
-                print(cmd)
+                print(f">> {cmd}")
                 subprocess.run(cmd, check=True)
-                print(f"Cloud build finished successfully: {config}")
+                print(f">> Cloud build finished successfully: {config}")
                 yield f.read()
         elif image_name:
             cmd = [
@@ -232,9 +232,9 @@ def cloud_build_submit(
                 *cmd_substitutions,
                 source,
             ]
-            print(cmd)
+            print(f">> {cmd}")
             subprocess.run(cmd, check=True)
-            print(f"Created image: gcr.io/{project}/{image_name}:{UUID}")
+            print(f">> Created image: gcr.io/{project}/{image_name}:{UUID}")
             yield f"{image_name}:{UUID}"
         else:
             raise ValueError("must specify either `config` or `image_name`")
@@ -250,9 +250,9 @@ def cloud_build_submit(
                 "--force-delete-tags",
                 "--quiet",
             ]
-            print(cmd)
+            print(f">> {cmd}")
             subprocess.run(cmd, check=True)
-            print(f"Deleted image: gcr.io/{project}/{image_name}:{UUID}")
+            print(f">> Deleted image: gcr.io/{project}/{image_name}:{UUID}")
 
     @staticmethod
     def dataflow_jobs_list(
@@ -304,13 +304,13 @@ def dataflow_jobs_get(
             )
             # If the job is not found, this throws an HttpError exception.
             job = request.execute()
-            print(f"Found Dataflow job: {job}")
+            print(f">> Found Dataflow job: {job}")
             return job
 
         elif job_name:
             for job in Utils.dataflow_jobs_list(project, list_page_size):
                 if job["name"] == job_name:
-                    print(f"Found Dataflow job: {job}")
+                    print(f">> Found Dataflow job: {job}")
                     return job
             raise ValueError(f"Dataflow job not found: job_name={job_name}")
 
@@ -339,7 +339,7 @@ def dataflow_jobs_wait(
             "JOB_STATE_CANCELLED",
         }
         print(
-            f"Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
+            f">> Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
         )
         status = None
         for _ in range(0, timeout_sec, poll_interval_sec):
@@ -364,7 +364,7 @@ def dataflow_jobs_wait(
     def dataflow_jobs_cancel(
         job_id: str, project: str = PROJECT, region: str = REGION
     ) -> None:
-        print(f"Canceling Dataflow job ID: {job_id}")
+        print(f">> Canceling Dataflow job ID: {job_id}")
         # We get an error using the googleapiclient.discovery APIs, probably
         # due to incompatible dependencies with apache-beam.
         # We use gcloud instead to cancel the job.
@@ -378,9 +378,9 @@ def dataflow_jobs_cancel(
             job_id,
             f"--region={region}",
         ]
-        print(cmd)
+        print(f">> {cmd}")
         subprocess.run(cmd, check=True)
-        print(f"Cancelled Dataflow job: {job_id}")
+        print(f">> Cancelled Dataflow job: {job_id}")
 
     @staticmethod
     def dataflow_flex_template_build(
@@ -403,10 +403,10 @@ def dataflow_flex_template_build(
             "--sdk-language=PYTHON",
             f"--metadata-file={metadata_file}",
         ]
-        print(cmd)
+        print(f">> {cmd}")
         subprocess.run(cmd, check=True)
 
-        print(f"dataflow_flex_template_build: {template_gcs_path}")
+        print(f">> dataflow_flex_template_build: {template_gcs_path}")
         yield template_gcs_path
         # The template file gets deleted when we delete the bucket.
 
@@ -423,7 +423,7 @@ def dataflow_flex_template_run(
 
         # https://cloud.google.com/sdk/gcloud/reference/dataflow/flex-template/run
         unique_job_name = Utils.hyphen_name(job_name)
-        print(f"dataflow_job_name: {unique_job_name}")
+        print(f">> dataflow_job_name: {unique_job_name}")
         cmd = [
             "gcloud",
             "dataflow",
@@ -440,7 +440,7 @@ def dataflow_flex_template_run(
                 "temp_location": f"gs://{bucket_name}/temp",
             }.items()
         ]
-        print(cmd)
+        print(f">> {cmd}")
         try:
             # The `capture_output` option was added in Python 3.7, so we must
             # pass the `stdout` and `stderr` options explicitly to support 3.6.
@@ -450,7 +450,7 @@ def dataflow_flex_template_run(
             )
             stdout = p.stdout.decode("utf-8")
             stderr = p.stderr.decode("utf-8")
-            print(f"Launched Dataflow Flex Template job: {unique_job_name}")
+            print(f">> Launched Dataflow Flex Template job: {unique_job_name}")
         except subprocess.CalledProcessError as e:
             print(e, file=sys.stderr)
             stdout = e.stdout.decode("utf-8")
@@ -466,6 +466,6 @@ def dataflow_flex_template_run(
 
 @pytest.fixture(scope="session")
 def utils() -> Utils:
-    print(f"Test unique identifier: {UUID}")
+    print(f">> Test unique identifier: {UUID}")
     subprocess.run(["gcloud", "version"])
     return Utils()

From afdfa0f35b7a172aa8317d8f8ab5e3e430482004 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 15:17:14 -0700
Subject: [PATCH 46/87] fix typo

---
 dataflow/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 14fe00fbe82..64cdf8c83b8 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -364,7 +364,7 @@ def dataflow_jobs_wait(
     def dataflow_jobs_cancel(
         job_id: str, project: str = PROJECT, region: str = REGION
     ) -> None:
-        print(f">> Canceling Dataflow job ID: {job_id}")
+        print(f">> Cancelling Dataflow job ID: {job_id}")
         # We get an error using the googleapiclient.discovery APIs, probably
         # due to incompatible dependencies with apache-beam.
         # We use gcloud instead to cancel the job.

From 8d50016606f7a47a21dc05c25b8b85b92362e337 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 15:19:33 -0700
Subject: [PATCH 47/87] add more logging

---
 dataflow/flex-templates/streaming_beam/e2e_test.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index c2c83ad02ea..4b879cc3289 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -95,8 +95,9 @@ def test_flex_template_run(
     # First, lets wait until the job is running.
     utils.dataflow_jobs_wait(job_id, until_status="JOB_STATE_RUNNING")
 
-    # Then, wait a couple minutes for data to arrive, get processed, and cancel it.
-    time.sleep(2 * 60)
+    # Then, for a while for data to arrive, get processed, and cancel it.
+    print(f">> Pipeline is running, waiting for messages to arrive")
+    time.sleep(60)
     utils.dataflow_jobs_cancel(job_id)
 
     # Check for the output data in BigQuery.

From 5880ce94d037609de365d4e9a3a37a61cdd13017 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 15:26:31 -0700
Subject: [PATCH 48/87] wait longer for table to be created

---
 dataflow/flex-templates/streaming_beam/e2e_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index 4b879cc3289..d48fe67a9d1 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -97,7 +97,7 @@ def test_flex_template_run(
 
     # Then, for a while for data to arrive, get processed, and cancel it.
     print(f">> Pipeline is running, waiting for messages to arrive")
-    time.sleep(60)
+    time.sleep(5 * 60)
     utils.dataflow_jobs_cancel(job_id)
 
     # Check for the output data in BigQuery.

From 68d3ad53c0938f0bd705bc9d21382e76950803c0 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 15:35:27 -0700
Subject: [PATCH 49/87] remove spurious f-string

---
 dataflow/flex-templates/streaming_beam/e2e_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index d48fe67a9d1..db93c832e16 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -96,7 +96,7 @@ def test_flex_template_run(
     utils.dataflow_jobs_wait(job_id, until_status="JOB_STATE_RUNNING")
 
     # Then, for a while for data to arrive, get processed, and cancel it.
-    print(f">> Pipeline is running, waiting for messages to arrive")
+    print(">> Pipeline is running, waiting for messages to arrive")
     time.sleep(5 * 60)
     utils.dataflow_jobs_cancel(job_id)
 

From 958a56a38fe6dc6f14a2684a6d4a0a96ac6c15da Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 16 Jun 2021 16:26:01 -0700
Subject: [PATCH 50/87] add disk size

---
 dataflow/gpu-workers/tensorflow-minimal/run.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dataflow/gpu-workers/tensorflow-minimal/run.yaml b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
index f5d2a77c0d8..7cb9d389134 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
@@ -46,6 +46,7 @@ steps:
   - --sdk_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
+  - --disk_size_gb=50
 
 options:
   logging: CLOUD_LOGGING_ONLY

From 1f47a9243611e1d81d6f441b9a34001d5a7bcd2b Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 17 Jun 2021 10:20:08 -0700
Subject: [PATCH 51/87] updated beam version

---
 dataflow/gpu-workers/tensorflow-landsat/main.py          | 3 +--
 dataflow/gpu-workers/tensorflow-landsat/requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/dataflow/gpu-workers/tensorflow-landsat/main.py b/dataflow/gpu-workers/tensorflow-landsat/main.py
index 21a31c69d48..6afa31522c3 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/main.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/main.py
@@ -52,11 +52,10 @@
 import logging
 import os
 import re
-from typing import Any, Dict, List, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import apache_beam as beam
 from apache_beam.options.pipeline_options import PipelineOptions
-from apache_beam.typehints.typehints import Optional
 import numpy as np
 from PIL import Image
 import rasterio
diff --git a/dataflow/gpu-workers/tensorflow-landsat/requirements.txt b/dataflow/gpu-workers/tensorflow-landsat/requirements.txt
index 1823ef09b96..cbfaaf47f75 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/requirements.txt
+++ b/dataflow/gpu-workers/tensorflow-landsat/requirements.txt
@@ -1,4 +1,4 @@
 Pillow==8.2.0
-apache-beam[gcp]==2.29.0
+apache-beam[gcp]==2.30.0
 rasterio==1.2.4
 tensorflow==2.5.0

From 84759a4d87ca95eb39fba08e558592adc951a445 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 17 Jun 2021 10:21:42 -0700
Subject: [PATCH 52/87] add comments

---
 dataflow/gpu-workers/pytorch-minimal/Dockerfile    | 2 ++
 dataflow/gpu-workers/tensorflow-landsat/Dockerfile | 2 ++
 dataflow/gpu-workers/tensorflow-minimal/Dockerfile | 2 ++
 3 files changed, 6 insertions(+)

diff --git a/dataflow/gpu-workers/pytorch-minimal/Dockerfile b/dataflow/gpu-workers/pytorch-minimal/Dockerfile
index 4bee40c9d13..0dcc04d017d 100644
--- a/dataflow/gpu-workers/pytorch-minimal/Dockerfile
+++ b/dataflow/gpu-workers/pytorch-minimal/Dockerfile
@@ -22,6 +22,8 @@ COPY requirements.txt .
 COPY *.py ./
 
 # Install the pipeline requirements and check that there are no conflicts.
+# Since the image already has all the dependencies installed,
+# there's no need to run with the --requirements_file option.
 RUN pip install --no-cache-dir --upgrade pip \
     && pip install --no-cache-dir -r requirements.txt \
     && pip check
diff --git a/dataflow/gpu-workers/tensorflow-landsat/Dockerfile b/dataflow/gpu-workers/tensorflow-landsat/Dockerfile
index a8686076460..cc2d7eba729 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/Dockerfile
+++ b/dataflow/gpu-workers/tensorflow-landsat/Dockerfile
@@ -33,6 +33,8 @@ RUN apt-get update \
     && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
     && curl https://bootstrap.pypa.io/get-pip.py | python \
     # Install the pipeline requirements and check that there are no conflicts.
+    # Since the image already has all the dependencies installed,
+    # there's no need to run with the --requirements_file option.
     && pip install --no-cache-dir --upgrade pip \
     && pip install --no-cache-dir -r requirements.txt \
     && pip check
diff --git a/dataflow/gpu-workers/tensorflow-minimal/Dockerfile b/dataflow/gpu-workers/tensorflow-minimal/Dockerfile
index e892d4c28a8..ff88332cd41 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/Dockerfile
+++ b/dataflow/gpu-workers/tensorflow-minimal/Dockerfile
@@ -33,6 +33,8 @@ RUN apt-get update \
     && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
     && curl https://bootstrap.pypa.io/get-pip.py | python \
     # Install the pipeline requirements and check that there are no conflicts.
+    # Since the image already has all the dependencies installed,
+    # there's no need to run with the --requirements_file option.
     && pip install --no-cache-dir --upgrade pip \
     && pip install --no-cache-dir -r requirements.txt \
     && pip check

From d11968803b6f3b30451ed276da40fee0fc091795 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 17 Jun 2021 11:26:00 -0700
Subject: [PATCH 53/87] add missing fixture

---
 dataflow/gpu-workers/tensorflow-landsat/e2e_test.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index 50f01202b1b..3576e3b781f 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -56,7 +56,7 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     )
 
 
-def test_tensorflow_landsat(utils: Utils, run_job: str) -> None:
+def test_tensorflow_landsat(utils: Utils, bucket_name: str, run_job: str) -> None:
     # Wait until the job finishes.
     timeout = 30 * 60  # 30 minutes
     status = utils.dataflow_jobs_wait(
@@ -66,6 +66,7 @@ def test_tensorflow_landsat(utils: Utils, run_job: str) -> None:
 
     # Check that output files were created and are not empty.
     storage_client = storage.Client()
+    print(f">> Checking for output files in: gs://{bucket_name}/outputs/")
     output_files = list(storage_client.list_blobs(bucket_name, prefix="outputs/"))
     assert len(output_files) > 0, f"No files found in gs://{bucket_name}/outputs/"
     for output_file in output_files:

From fe197aef9c046f17fc5c060432425ffd9d27a15d Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 17 Jun 2021 11:27:47 -0700
Subject: [PATCH 54/87] decrease waiting time

---
 dataflow/flex-templates/streaming_beam/e2e_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index db93c832e16..53228d7effe 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -97,7 +97,7 @@ def test_flex_template_run(
 
     # Then, for a while for data to arrive, get processed, and cancel it.
     print(">> Pipeline is running, waiting for messages to arrive")
-    time.sleep(5 * 60)
+    time.sleep(60)
     utils.dataflow_jobs_cancel(job_id)
 
     # Check for the output data in BigQuery.

From 303e0091918d79dea25827b29d3a0d274c2f7228 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 17 Jun 2021 12:22:17 -0700
Subject: [PATCH 55/87] increase waiting time

---
 dataflow/flex-templates/streaming_beam/e2e_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index 53228d7effe..db93c832e16 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -97,7 +97,7 @@ def test_flex_template_run(
 
     # Then, for a while for data to arrive, get processed, and cancel it.
     print(">> Pipeline is running, waiting for messages to arrive")
-    time.sleep(60)
+    time.sleep(5 * 60)
     utils.dataflow_jobs_cancel(job_id)
 
     # Check for the output data in BigQuery.

From 748345ff258a5df79d257d97b814228859eaaf7d Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 17 Jun 2021 12:32:26 -0700
Subject: [PATCH 56/87] provide staging location

---
 dataflow/conftest.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 64cdf8c83b8..8a0aa35ac70 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -433,6 +433,7 @@ def dataflow_flex_template_run(
             f"--template-file-gcs-location={template_path}",
             f"--project={project}",
             f"--region={region}",
+            f"--staging-location=gs://{bucket_name}/staging",
         ] + [
             f"--parameters={name}={value}"
             for name, value in {

From 226d642ebbfa5a6d38709531f147960669b6c178 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Fri, 18 Jun 2021 13:12:09 -0700
Subject: [PATCH 57/87] drain before cancel

---
 dataflow/conftest.py                          | 110 ++++++++++--------
 .../flex-templates/streaming_beam/e2e_test.py |   5 +-
 2 files changed, 67 insertions(+), 48 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 8a0aa35ac70..ea31511615e 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -61,21 +61,21 @@ def storage_bucket(name: str) -> str:
         storage_client = storage.Client()
         bucket = storage_client.create_bucket(Utils.hyphen_name(name))
 
-        print(f">> Created storage_bucket: {bucket.name}")
+        logging.info(f"Created storage_bucket: {bucket.name}")
         yield bucket.name
 
         # Print all the objects in the bucket before deleting for debugging.
-        print(f">> Deleting bucket {bucket.name} with the following contents:")
+        logging.info(f"Deleting bucket {bucket.name} with the following contents:")
         total_files = 0
         total_size = 0
         for blob in bucket.list_blobs():
-            print(f"  - {blob.name} ({blob.size} bytes)")
+            logging.info(f"  - {blob.name} ({blob.size} bytes)")
             total_files += 1
             total_size += blob.size
-        print(f">> Total {total_files} files ({total_size} bytes)")
+        logging.info(f"Total {total_files} files ({total_size} bytes)")
 
         bucket.delete(force=True)
-        print(f">> Deleted storage_bucket: {bucket.name}")
+        logging.info(f"Deleted storage_bucket: {bucket.name}")
 
     @staticmethod
     def bigquery_dataset(name: str, project: str = PROJECT) -> str:
@@ -87,20 +87,20 @@ def bigquery_dataset(name: str, project: str = PROJECT) -> str:
             bigquery.Dataset(f"{project}.{Utils.underscore_name(name)}")
         )
 
-        print(f">> Created bigquery_dataset: {dataset.full_dataset_id}")
+        logging.info(f"Created bigquery_dataset: {dataset.full_dataset_id}")
         yield dataset.full_dataset_id
 
         bigquery_client.delete_dataset(
             dataset.full_dataset_id.replace(":", "."), delete_contents=True
         )
-        print(f">> Deleted bigquery_dataset: {dataset.full_dataset_id}")
+        logging.info(f"Deleted bigquery_dataset: {dataset.full_dataset_id}")
 
     @staticmethod
     def bigquery_query(query: str) -> Iterable[Dict[str, Any]]:
         from google.cloud import bigquery
 
         bigquery_client = bigquery.Client()
-        print(f">> Bigquery query: {query}")
+        logging.info(f"Bigquery query: {query}")
         for row in bigquery_client.query(query):
             yield dict(row)
 
@@ -112,7 +112,7 @@ def pubsub_topic(name: str, project: str = PROJECT) -> str:
         topic_path = publisher_client.topic_path(project, Utils.hyphen_name(name))
         topic = publisher_client.create_topic(topic_path)
 
-        print(f">> Created pubsub_topic: {topic.name}")
+        logging.info(f"Created pubsub_topic: {topic.name}")
         yield topic.name
 
         # Due to the pinned library dependencies in apache-beam, client
@@ -120,9 +120,9 @@ def pubsub_topic(name: str, project: str = PROJECT) -> str:
         # We use gcloud for a workaround. See also:
         # https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4492
         cmd = ["gcloud", "pubsub", "--project", project, "topics", "delete", topic.name]
-        print(f">> {cmd}")
+        logging.info(f"{cmd}")
         subprocess.run(cmd, check=True)
-        print(f">> Deleted pubsub_topic: {topic.name}")
+        logging.info(f"Deleted pubsub_topic: {topic.name}")
 
     @staticmethod
     def pubsub_subscription(
@@ -138,7 +138,7 @@ def pubsub_subscription(
         )
         subscription = subscriber.create_subscription(subscription_path, topic_path)
 
-        print(f">> Created pubsub_subscription: {subscription.name}")
+        logging.info(f"Created pubsub_subscription: {subscription.name}")
         yield subscription.name
 
         # Due to the pinned library dependencies in apache-beam, client
@@ -154,9 +154,9 @@ def pubsub_subscription(
             "delete",
             subscription.name,
         ]
-        print(f">> {cmd}")
+        logging.info(f"{cmd}")
         subprocess.run(cmd, check=True)
-        print(f">> Deleted pubsub_subscription: {subscription.name}")
+        logging.info(f"Deleted pubsub_subscription: {subscription.name}")
 
     @staticmethod
     def pubsub_publisher(
@@ -176,14 +176,14 @@ def _infinite_publish_job() -> None:
                 time.sleep(sleep_sec)
 
         # Start a subprocess in the background to do the publishing.
-        print(f">> Starting publisher on {topic_path}")
+        logging.info(f"Starting publisher on {topic_path}")
         p = mp.Process(target=_infinite_publish_job)
         p.start()
 
         yield p.is_alive()
 
         # For cleanup, terminate the background process.
-        print(">> Stopping publisher")
+        logging.info("Stopping publisher")
         p.join(timeout=0)
         p.terminate()
 
@@ -197,7 +197,7 @@ def cloud_build_submit(
     ) -> None:
         """Sends a Cloud Build job, if an image_name is provided it will be deleted at teardown."""
         cmd = ["gcloud", "auth", "configure-docker"]
-        print(f">> {cmd}")
+        logging.info(f"{cmd}")
         subprocess.run(cmd, check=True)
 
         if substitutions:
@@ -218,9 +218,9 @@ def cloud_build_submit(
                     *cmd_substitutions,
                     source,
                 ]
-                print(f">> {cmd}")
+                logging.info(f"{cmd}")
                 subprocess.run(cmd, check=True)
-                print(f">> Cloud build finished successfully: {config}")
+                logging.info(f"Cloud build finished successfully: {config}")
                 yield f.read()
         elif image_name:
             cmd = [
@@ -232,9 +232,9 @@ def cloud_build_submit(
                 *cmd_substitutions,
                 source,
             ]
-            print(f">> {cmd}")
+            logging.info(f"{cmd}")
             subprocess.run(cmd, check=True)
-            print(f">> Created image: gcr.io/{project}/{image_name}:{UUID}")
+            logging.info(f"Created image: gcr.io/{project}/{image_name}:{UUID}")
             yield f"{image_name}:{UUID}"
         else:
             raise ValueError("must specify either `config` or `image_name`")
@@ -250,9 +250,9 @@ def cloud_build_submit(
                 "--force-delete-tags",
                 "--quiet",
             ]
-            print(f">> {cmd}")
+            logging.info(f"{cmd}")
             subprocess.run(cmd, check=True)
-            print(f">> Deleted image: gcr.io/{project}/{image_name}:{UUID}")
+            logging.info(f"Deleted image: gcr.io/{project}/{image_name}:{UUID}")
 
     @staticmethod
     def dataflow_jobs_list(
@@ -304,13 +304,13 @@ def dataflow_jobs_get(
             )
             # If the job is not found, this throws an HttpError exception.
             job = request.execute()
-            print(f">> Found Dataflow job: {job}")
+            logging.info(f"Found Dataflow job: {job}")
             return job
 
         elif job_name:
             for job in Utils.dataflow_jobs_list(project, list_page_size):
                 if job["name"] == job_name:
-                    print(f">> Found Dataflow job: {job}")
+                    logging.info(f"Found Dataflow job: {job}")
                     return job
             raise ValueError(f"Dataflow job not found: job_name={job_name}")
 
@@ -324,7 +324,7 @@ def dataflow_jobs_wait(
         project: str = PROJECT,
         until_status: str = "JOB_STATE_DONE",
         timeout_sec: str = 600,  # defaults to 10 minutes
-        poll_interval_sec=30,
+        poll_interval_sec=60,
         list_page_size=100,
     ) -> Optional[str]:
         """For a list of all the valid states:
@@ -338,10 +338,9 @@ def dataflow_jobs_wait(
             "JOB_STATE_FAILED",
             "JOB_STATE_CANCELLED",
         }
-        print(
-            f">> Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
+        logging.info(
+            f"Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
         )
-        status = None
         for _ in range(0, timeout_sec, poll_interval_sec):
             try:
                 job = Utils.dataflow_jobs_get(
@@ -352,7 +351,13 @@ def dataflow_jobs_wait(
                 )
                 status = job["currentState"]
                 if status in target_status:
+                    logging.info(
+                        f"Job status {status} in {target_status}, done waiting"
+                    )
                     return status
+                logging.info(
+                    f"Job status {status} not in {target_status}, retrying in {poll_interval_sec} seconds"
+                )
             except Exception as e:
                 logging.exception(e)
             time.sleep(poll_interval_sec)
@@ -364,11 +369,24 @@ def dataflow_jobs_wait(
     def dataflow_jobs_cancel(
         job_id: str, project: str = PROJECT, region: str = REGION
     ) -> None:
-        print(f">> Cancelling Dataflow job ID: {job_id}")
+        logging.info(f"Cancelling Dataflow job ID: {job_id}")
         # We get an error using the googleapiclient.discovery APIs, probably
         # due to incompatible dependencies with apache-beam.
         # We use gcloud instead to cancel the job.
-        #   https://cloud.google.com/sdk/gcloud/reference/dataflow/jobs/cancel
+        # https://cloud.google.com/sdk/gcloud/reference/dataflow/jobs/drain
+        cmd = [
+            "gcloud",
+            f"--project={project}",
+            "dataflow",
+            "jobs",
+            "drain",
+            job_id,
+            f"--region={region}",
+        ]
+        logging.info(f"{cmd}")
+        subprocess.run(cmd, check=True)
+
+        # https://cloud.google.com/sdk/gcloud/reference/dataflow/jobs/cancel
         cmd = [
             "gcloud",
             f"--project={project}",
@@ -378,9 +396,9 @@ def dataflow_jobs_cancel(
             job_id,
             f"--region={region}",
         ]
-        print(f">> {cmd}")
+        logging.info(f"{cmd}")
         subprocess.run(cmd, check=True)
-        print(f">> Cancelled Dataflow job: {job_id}")
+        logging.info(f"Cancelled Dataflow job: {job_id}")
 
     @staticmethod
     def dataflow_flex_template_build(
@@ -403,10 +421,10 @@ def dataflow_flex_template_build(
             "--sdk-language=PYTHON",
             f"--metadata-file={metadata_file}",
         ]
-        print(f">> {cmd}")
+        logging.info(f"{cmd}")
         subprocess.run(cmd, check=True)
 
-        print(f">> dataflow_flex_template_build: {template_gcs_path}")
+        logging.info(f"dataflow_flex_template_build: {template_gcs_path}")
         yield template_gcs_path
         # The template file gets deleted when we delete the bucket.
 
@@ -423,7 +441,7 @@ def dataflow_flex_template_run(
 
         # https://cloud.google.com/sdk/gcloud/reference/dataflow/flex-template/run
         unique_job_name = Utils.hyphen_name(job_name)
-        print(f">> dataflow_job_name: {unique_job_name}")
+        logging.info(f"dataflow_job_name: {unique_job_name}")
         cmd = [
             "gcloud",
             "dataflow",
@@ -438,10 +456,10 @@ def dataflow_flex_template_run(
             f"--parameters={name}={value}"
             for name, value in {
                 **parameters,
-                "temp_location": f"gs://{bucket_name}/temp",
             }.items()
         ]
-        print(f">> {cmd}")
+        logging.info(f"{cmd}")
+
         try:
             # The `capture_output` option was added in Python 3.7, so we must
             # pass the `stdout` and `stderr` options explicitly to support 3.6.
@@ -451,22 +469,22 @@ def dataflow_flex_template_run(
             )
             stdout = p.stdout.decode("utf-8")
             stderr = p.stderr.decode("utf-8")
-            print(f">> Launched Dataflow Flex Template job: {unique_job_name}")
+            logging.info(f"Launched Dataflow Flex Template job: {unique_job_name}")
         except subprocess.CalledProcessError as e:
-            print(e, file=sys.stderr)
+            logging.info(e, file=sys.stderr)
             stdout = e.stdout.decode("utf-8")
             stderr = e.stderr.decode("utf-8")
         finally:
-            print("--- stderr ---")
-            print(stderr)
-            print("--- stdout ---")
-            print(stdout)
-            print("--- end ---")
+            logging.info("--- stderr ---")
+            logging.info(stderr)
+            logging.info("--- stdout ---")
+            logging.info(stdout)
+            logging.info("--- end ---")
         return yaml.safe_load(stdout)["job"]["id"]
 
 
 @pytest.fixture(scope="session")
 def utils() -> Utils:
-    print(f">> Test unique identifier: {UUID}")
+    logging.info(f"Test unique identifier: {UUID}")
     subprocess.run(["gcloud", "version"])
     return Utils()
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index db93c832e16..d2828d731a1 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 
 import json
+import logging
 import time
 
 try:
@@ -96,8 +97,8 @@ def test_flex_template_run(
     utils.dataflow_jobs_wait(job_id, until_status="JOB_STATE_RUNNING")
 
     # Then, for a while for data to arrive, get processed, and cancel it.
-    print(">> Pipeline is running, waiting for messages to arrive")
-    time.sleep(5 * 60)
+    logging.info("Pipeline is running, waiting for messages to arrive")
+    time.sleep(60)
     utils.dataflow_jobs_cancel(job_id)
 
     # Check for the output data in BigQuery.

From 16e5d8e917bd6439ac74c8022de3f4fb73ff2c68 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Fri, 18 Jun 2021 13:39:36 -0700
Subject: [PATCH 58/87] adjust wait time

---
 dataflow/flex-templates/streaming_beam/e2e_test.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index d2828d731a1..4fb3c484491 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -98,7 +98,7 @@ def test_flex_template_run(
 
     # Then, for a while for data to arrive, get processed, and cancel it.
     logging.info("Pipeline is running, waiting for messages to arrive")
-    time.sleep(60)
+    time.sleep(5 * 60)
     utils.dataflow_jobs_cancel(job_id)
 
     # Check for the output data in BigQuery.

From aa29c54962d200f7035d8f09bde744db7b7bfc88 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Fri, 18 Jun 2021 14:40:42 -0700
Subject: [PATCH 59/87] adjust timeout

---
 dataflow/conftest.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index ea31511615e..b397bb848ed 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -323,7 +323,7 @@ def dataflow_jobs_wait(
         job_name: Optional[str] = None,
         project: str = PROJECT,
         until_status: str = "JOB_STATE_DONE",
-        timeout_sec: str = 600,  # defaults to 10 minutes
+        timeout_sec: str = 20 * 60,  # defaults to 20 minutes
         poll_interval_sec=60,
         list_page_size=100,
     ) -> Optional[str]:
@@ -341,7 +341,7 @@ def dataflow_jobs_wait(
         logging.info(
             f"Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
         )
-        for _ in range(0, timeout_sec, poll_interval_sec):
+        for _ in range(0, timeout_sec + 1, poll_interval_sec):
             try:
                 job = Utils.dataflow_jobs_get(
                     job_id=job_id,
@@ -485,6 +485,7 @@ def dataflow_flex_template_run(
 
 @pytest.fixture(scope="session")
 def utils() -> Utils:
+    logging.getLogger().setLevel(logging.info)
     logging.info(f"Test unique identifier: {UUID}")
     subprocess.run(["gcloud", "version"])
     return Utils()

From 57eb9ab49ad2147dc161a25db0977f9ecb40eefe Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Fri, 18 Jun 2021 14:57:00 -0700
Subject: [PATCH 60/87] adjust timeout

---
 dataflow/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index b397bb848ed..4e27e8df4f5 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -485,7 +485,7 @@ def dataflow_flex_template_run(
 
 @pytest.fixture(scope="session")
 def utils() -> Utils:
-    logging.getLogger().setLevel(logging.info)
+    logging.getLogger().setLevel(logging.INFO)
     logging.info(f"Test unique identifier: {UUID}")
     subprocess.run(["gcloud", "version"])
     return Utils()

From 3104e6370f70d95a27d7312c87f6f6dad8a314bc Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Fri, 18 Jun 2021 15:43:44 -0700
Subject: [PATCH 61/87] improve error messages

---
 dataflow/conftest.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 4e27e8df4f5..b69d3dabf35 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -341,6 +341,7 @@ def dataflow_jobs_wait(
         logging.info(
             f"Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
         )
+        status = None
         for _ in range(0, timeout_sec + 1, poll_interval_sec):
             try:
                 job = Utils.dataflow_jobs_get(
@@ -361,9 +362,14 @@ def dataflow_jobs_wait(
             except Exception as e:
                 logging.exception(e)
             time.sleep(poll_interval_sec)
-        raise RuntimeError(
-            f"Dataflow job not found in status {target_status}: job_id={job_id}, job_name={job_name}"
-        )
+        if status is None:
+            raise RuntimeError(
+                f"Dataflow job not found: timeout_sec={timeout_sec}, target_status={target_status}, job_id={job_id}, job_name={job_name}"
+            )
+        else:
+            raise RuntimeError(
+                f"Dataflow job finished in status {status} but expected {target_status}: job_id={job_id}, job_name={job_name}"
+            )
 
     @staticmethod
     def dataflow_jobs_cancel(

From 60372bb6384588dabf185955a8f50bd793e5f368 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 21 Jun 2021 11:29:31 -0700
Subject: [PATCH 62/87] small refactorings

---
 dataflow/conftest.py                          |  8 ++++++-
 .../flex-templates/streaming_beam/e2e_test.py | 19 +++++++++------
 .../gpu-workers/pytorch-minimal/README.md     | 18 +++++++--------
 .../gpu-workers/pytorch-minimal/e2e_test.py   |  4 ++--
 .../gpu-workers/tensorflow-landsat/README.md  | 23 ++++++++-----------
 .../tensorflow-landsat/e2e_test.py            |  6 +++--
 .../gpu-workers/tensorflow-minimal/README.md  | 18 +++++++--------
 .../tensorflow-minimal/e2e_test.py            |  4 ++--
 8 files changed, 52 insertions(+), 48 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index b69d3dabf35..6dec741f172 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -322,8 +322,9 @@ def dataflow_jobs_wait(
         job_id: Optional[str] = None,
         job_name: Optional[str] = None,
         project: str = PROJECT,
+        region: str = REGION,
         until_status: str = "JOB_STATE_DONE",
-        timeout_sec: str = 20 * 60,  # defaults to 20 minutes
+        timeout_sec: str = 30 * 60,
         poll_interval_sec=60,
         list_page_size=100,
     ) -> Optional[str]:
@@ -356,6 +357,11 @@ def dataflow_jobs_wait(
                         f"Job status {status} in {target_status}, done waiting"
                     )
                     return status
+                elif status == "JOB_STATE_FAILED":
+                    raise RuntimeError(
+                        "Dataflow job failed:\n"
+                        f"https://console.cloud.google.com/dataflow/jobs/{region}/{job_id}?project={project}"
+                    )
                 logging.info(
                     f"Job status {status} not in {target_status}, retrying in {poll_interval_sec} seconds"
                 )
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index 4fb3c484491..6faec89b8e5 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -72,23 +72,23 @@ def flex_template_path(utils: Utils, bucket_name: str, flex_template_image: str)
     )
 
 
-def test_flex_template_run(
+@pytest.fixture(scope="session")
+def run_dataflow_job(
     utils: Utils,
     bucket_name: str,
     pubsub_publisher: str,
     pubsub_subscription: str,
     flex_template_path: str,
     bigquery_dataset: str,
-) -> None:
+) -> str:
 
-    bigquery_table = "output_table"
     job_id = utils.dataflow_flex_template_run(
         job_name=NAME,
         template_path=flex_template_path,
         bucket_name=bucket_name,
         parameters={
             "input_subscription": pubsub_subscription,
-            "output_table": f"{bigquery_dataset}.{bigquery_table}",
+            "output_table": f"{bigquery_dataset}.output_table",
         },
     )
 
@@ -96,13 +96,18 @@ def test_flex_template_run(
     # First, lets wait until the job is running.
     utils.dataflow_jobs_wait(job_id, until_status="JOB_STATE_RUNNING")
 
-    # Then, for a while for data to arrive, get processed, and cancel it.
+    yield job_id
+
+    utils.dataflow_jobs_cancel(job_id)
+
+
+def test_flex_template_run(utils: Utils, run_dataflow_job: str) -> None:
+    # Wait for a while for data to arrive and get processed.
     logging.info("Pipeline is running, waiting for messages to arrive")
     time.sleep(5 * 60)
-    utils.dataflow_jobs_cancel(job_id)
 
     # Check for the output data in BigQuery.
-    query = f"SELECT * FROM `{bigquery_dataset.replace(':', '.')}.{bigquery_table}`"
+    query = f"SELECT * FROM `{bigquery_dataset.replace(':', '.')}.output_table`"
     rows = list(utils.bigquery_query(query))
     assert len(rows) > 0
     for row in rows:
diff --git a/dataflow/gpu-workers/pytorch-minimal/README.md b/dataflow/gpu-workers/pytorch-minimal/README.md
index 3ec270791f9..43e24830529 100644
--- a/dataflow/gpu-workers/pytorch-minimal/README.md
+++ b/dataflow/gpu-workers/pytorch-minimal/README.md
@@ -5,15 +5,12 @@
 Make sure you have followed the
 [Dataflow setup instructions](../../README.md).
 
-Finally, save your resource names in environment variables.
-
-```sh
-export PROJECT=$(gcloud config get-value project)
-```
-
 ## Building the Docker image
 
-We use Cloud Build to build the container image for the workers.
+We use
+[Cloud Build](https://cloud.google.com/build)
+to build the container image for the workers and save it in
+[Container Registry](https://cloud.google.com/container-registry/).
 
 ```sh
 gcloud builds submit --config build.yaml
@@ -21,9 +18,10 @@ gcloud builds submit --config build.yaml
 
 ## Running the Dataflow job with GPUs
 
-We use Cloud Build to run the Dataflow job.
-We launch the job using the worker image to make sure the job launches
-with the same Python version as the workers.
+We use Cloud Build to run the [Dataflow](https://cloud.google.com/dataflow) job.
+
+> ℹ️ We launch the job using the worker image to make sure the job launches
+> with the same Python version as the workers and all the dependencies installed.
 
 ```sh
 export REGION="us-central1"
diff --git a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
index c24ee471e5d..52d6a2c7ab0 100644
--- a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/pytorch-minimal/e2e_test.py
@@ -40,7 +40,7 @@ def build_image(utils: Utils) -> str:
 
 
 @pytest.fixture(scope="session")
-def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
+def run_dataflow_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
         config="run.yaml",
@@ -54,7 +54,7 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     )
 
 
-def test_pytorch_minimal(utils: Utils, run_job: str) -> None:
+def test_pytorch_minimal(utils: Utils, run_dataflow_job: str) -> None:
     # Wait until the job finishes.
     status = utils.dataflow_jobs_wait(job_name=utils.hyphen_name(NAME))
     assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"
diff --git a/dataflow/gpu-workers/tensorflow-landsat/README.md b/dataflow/gpu-workers/tensorflow-landsat/README.md
index 7f826e9c00c..dd5b8fadbc7 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/README.md
+++ b/dataflow/gpu-workers/tensorflow-landsat/README.md
@@ -7,20 +7,14 @@
 ## Before you begin
 
 Make sure you have followed the
-[Dataflow setup instructions](../../README.md), and additionally:
-
-* Use or [create a Cloud Storage bucket](https://console.cloud.google.com/storage/create-bucket).
-
-Finally, save your resource names in environment variables.
-
-```sh
-export PROJECT=$(gcloud config get-value project)
-export BUCKET="my-bucket-name"
-```
+[Dataflow setup instructions](../../README.md).
 
 ## Building the Docker image
 
-We use Cloud Build to build the container image for the workers.
+We use
+[Cloud Build](https://cloud.google.com/build)
+to build the container image for the workers and save it in
+[Container Registry](https://cloud.google.com/container-registry/).
 
 ```sh
 gcloud builds submit --config build.yaml
@@ -28,9 +22,10 @@ gcloud builds submit --config build.yaml
 
 ## Running the Dataflow job with GPUs
 
-We use Cloud Build to run the Dataflow job.
-We launch the job using the worker image to make sure the job launches
-with the same Python version as the workers.
+We use Cloud Build to run the [Dataflow](https://cloud.google.com/dataflow) job.
+
+> ℹ️ We launch the job using the worker image to make sure the job launches
+> with the same Python version as the workers and all the dependencies installed.
 
 ```sh
 export OUTPUT_PATH="gs://$BUCKET/samples/dataflow/landsat/output-images/"
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
index 3576e3b781f..972fe627f42 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
@@ -41,7 +41,7 @@ def build_image(utils: Utils) -> str:
 
 
 @pytest.fixture(scope="session")
-def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
+def run_dataflow_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
         config="run.yaml",
@@ -56,7 +56,9 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     )
 
 
-def test_tensorflow_landsat(utils: Utils, bucket_name: str, run_job: str) -> None:
+def test_tensorflow_landsat(
+    utils: Utils, bucket_name: str, run_dataflow_job: str
+) -> None:
     # Wait until the job finishes.
     timeout = 30 * 60  # 30 minutes
     status = utils.dataflow_jobs_wait(
diff --git a/dataflow/gpu-workers/tensorflow-minimal/README.md b/dataflow/gpu-workers/tensorflow-minimal/README.md
index a645dbb411b..debd86b0e91 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/README.md
+++ b/dataflow/gpu-workers/tensorflow-minimal/README.md
@@ -5,15 +5,12 @@
 Make sure you have followed the
 [Dataflow setup instructions](../../README.md).
 
-Finally, save your resource names in environment variables.
-
-```sh
-export PROJECT=$(gcloud config get-value project)
-```
-
 ## Building the Docker image
 
-We use Cloud Build to build the container image for the workers.
+We use
+[Cloud Build](https://cloud.google.com/build)
+to build the container image for the workers and save it in
+[Container Registry](https://cloud.google.com/container-registry/).
 
 ```sh
 gcloud builds submit --config build.yaml
@@ -21,9 +18,10 @@ gcloud builds submit --config build.yaml
 
 ## Running the Dataflow job with GPUs
 
-We use Cloud Build to run the Dataflow job.
-We launch the job using the worker image to make sure the job launches
-with the same Python version as the workers.
+We use Cloud Build to run the [Dataflow](https://cloud.google.com/dataflow) job.
+
+> ℹ️ We launch the job using the worker image to make sure the job launches
+> with the same Python version as the workers and all the dependencies installed.
 
 ```sh
 export REGION="us-central1"
diff --git a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
index ebac5c00dd7..6c890550d68 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
@@ -40,7 +40,7 @@ def build_image(utils: Utils) -> str:
 
 
 @pytest.fixture(scope="session")
-def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
+def run_dataflow_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
         config="run.yaml",
@@ -54,7 +54,7 @@ def run_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     )
 
 
-def test_tensorflow_minimal(utils: Utils, run_job: str) -> None:
+def test_tensorflow_minimal(utils: Utils, run_dataflow_job: str) -> None:
     # Wait until the job finishes.
     status = utils.dataflow_jobs_wait(job_name=utils.hyphen_name(NAME))
     assert status == "JOB_STATE_DONE", f"Dataflow pipeline finished in {status} status"

From 688865f804e1119f8be1e2eca172fe0de3397c2f Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 21 Jun 2021 12:04:27 -0700
Subject: [PATCH 63/87] add missing fixture

---
 dataflow/flex-templates/streaming_beam/e2e_test.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index 6faec89b8e5..752fadf323b 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -101,7 +101,9 @@ def run_dataflow_job(
     utils.dataflow_jobs_cancel(job_id)
 
 
-def test_flex_template_run(utils: Utils, run_dataflow_job: str) -> None:
+def test_flex_template_run(
+    utils: Utils, bigquery_dataset: str, run_dataflow_job: str
+) -> None:
     # Wait for a while for data to arrive and get processed.
     logging.info("Pipeline is running, waiting for messages to arrive")
     time.sleep(5 * 60)

From 230fa94d137f3267dacd3fb900d3dd6552a74af1 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 23 Jun 2021 09:45:11 -0700
Subject: [PATCH 64/87] updated timeout

---
 dataflow/conftest.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 6dec741f172..41b3aea8490 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -284,7 +284,7 @@ def dataflow_jobs_get(
         job_id: Optional[str] = None,
         job_name: Optional[str] = None,
         project: str = PROJECT,
-        list_page_size=30,
+        list_page_size: int = 30,
     ) -> Optional[Dict[str, Any]]:
         from googleapiclient.discovery import build
 
@@ -343,7 +343,7 @@ def dataflow_jobs_wait(
             f"Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
         )
         status = None
-        for _ in range(0, timeout_sec + 1, poll_interval_sec):
+        for _ in range(0, timeout_sec, poll_interval_sec):
             try:
                 job = Utils.dataflow_jobs_get(
                     job_id=job_id,

From a34132759a0edeb7f60f57da0b483c3bcb57335a Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 23 Jun 2021 18:34:30 +0000
Subject: [PATCH 65/87] containerize gpu tests

---
 dataflow/conftest.py                          | 387 +++++---------
 .../flex-templates/streaming_beam/e2e_test.py |  41 +-
 dataflow/gpu-workers/conftest.py              | 503 ++++++++++++++++++
 3 files changed, 651 insertions(+), 280 deletions(-)
 create mode 100644 dataflow/gpu-workers/conftest.py

diff --git a/dataflow/conftest.py b/dataflow/conftest.py
index 41b3aea8490..13314bf86dd 100644
--- a/dataflow/conftest.py
+++ b/dataflow/conftest.py
@@ -13,15 +13,12 @@
 from dataclasses import dataclass
 import itertools
 import json
-import logging
 import multiprocessing as mp
 import os
-import re
-import platform
 import subprocess
 import sys
 import time
-from typing import Any, Callable, Dict, Iterable, Optional, Union
+from typing import Any, Callable, Dict, Iterable, Optional
 import uuid
 
 import pytest
@@ -29,90 +26,65 @@
 # Default options.
 UUID = uuid.uuid4().hex[0:6]
 PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"]
-REGION = "us-central1"
+REGION = "us-west1"
+ZONE = "us-west1-b"
 
 RETRY_MAX_TIME = 5 * 60  # 5 minutes in seconds
 
-HYPHEN_NAME_RE = re.compile(r"[^\w\d-]+")
-UNDERSCORE_NAME_RE = re.compile(r"[^\w\d_]+")
-
-PYTHON_VERSION = "".join(platform.python_version_tuple()[0:2])
-
 
 @dataclass
 class Utils:
     uuid: str = UUID
     project: str = PROJECT
     region: str = REGION
+    zone: str = ZONE
 
     @staticmethod
-    def hyphen_name(name: str) -> str:
-        unique_name = f"{name}-py{PYTHON_VERSION}-{UUID}"
-        return HYPHEN_NAME_RE.sub("-", unique_name)
-
-    @staticmethod
-    def underscore_name(name: str) -> str:
-        return UNDERSCORE_NAME_RE.sub("_", Utils.hyphen_name(name))
-
-    @staticmethod
-    def storage_bucket(name: str) -> str:
+    def storage_bucket(bucket_name: str) -> str:
         from google.cloud import storage
 
         storage_client = storage.Client()
-        bucket = storage_client.create_bucket(Utils.hyphen_name(name))
+        bucket_unique_name = f"{bucket_name}-{UUID}"
+        bucket = storage_client.create_bucket(bucket_unique_name)
 
-        logging.info(f"Created storage_bucket: {bucket.name}")
-        yield bucket.name
-
-        # Print all the objects in the bucket before deleting for debugging.
-        logging.info(f"Deleting bucket {bucket.name} with the following contents:")
-        total_files = 0
-        total_size = 0
-        for blob in bucket.list_blobs():
-            logging.info(f"  - {blob.name} ({blob.size} bytes)")
-            total_files += 1
-            total_size += blob.size
-        logging.info(f"Total {total_files} files ({total_size} bytes)")
+        print(f"storage_bucket: {bucket_unique_name}")
+        yield bucket_unique_name
 
         bucket.delete(force=True)
-        logging.info(f"Deleted storage_bucket: {bucket.name}")
 
     @staticmethod
-    def bigquery_dataset(name: str, project: str = PROJECT) -> str:
+    def bigquery_dataset(dataset_name: str, project: str = PROJECT) -> str:
         from google.cloud import bigquery
 
         bigquery_client = bigquery.Client()
-
         dataset = bigquery_client.create_dataset(
-            bigquery.Dataset(f"{project}.{Utils.underscore_name(name)}")
+            bigquery.Dataset(f"{project}.{dataset_name.replace('-', '_')}_{UUID}")
         )
 
-        logging.info(f"Created bigquery_dataset: {dataset.full_dataset_id}")
+        print(f"bigquery_dataset: {dataset.full_dataset_id}")
         yield dataset.full_dataset_id
 
         bigquery_client.delete_dataset(
             dataset.full_dataset_id.replace(":", "."), delete_contents=True
         )
-        logging.info(f"Deleted bigquery_dataset: {dataset.full_dataset_id}")
 
     @staticmethod
     def bigquery_query(query: str) -> Iterable[Dict[str, Any]]:
         from google.cloud import bigquery
 
         bigquery_client = bigquery.Client()
-        logging.info(f"Bigquery query: {query}")
         for row in bigquery_client.query(query):
             yield dict(row)
 
     @staticmethod
-    def pubsub_topic(name: str, project: str = PROJECT) -> str:
+    def pubsub_topic(topic_name: str, project: str = PROJECT) -> str:
         from google.cloud import pubsub
 
         publisher_client = pubsub.PublisherClient()
-        topic_path = publisher_client.topic_path(project, Utils.hyphen_name(name))
+        topic_path = publisher_client.topic_path(project, f"{topic_name}-{UUID}")
         topic = publisher_client.create_topic(topic_path)
 
-        logging.info(f"Created pubsub_topic: {topic.name}")
+        print(f"pubsub_topic: {topic.name}")
         yield topic.name
 
         # Due to the pinned library dependencies in apache-beam, client
@@ -120,25 +92,24 @@ def pubsub_topic(name: str, project: str = PROJECT) -> str:
         # We use gcloud for a workaround. See also:
         # https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4492
         cmd = ["gcloud", "pubsub", "--project", project, "topics", "delete", topic.name]
-        logging.info(f"{cmd}")
+        print(cmd)
         subprocess.run(cmd, check=True)
-        logging.info(f"Deleted pubsub_topic: {topic.name}")
 
     @staticmethod
     def pubsub_subscription(
         topic_path: str,
-        name: str,
+        subscription_name: str,
         project: str = PROJECT,
     ) -> str:
         from google.cloud import pubsub
 
         subscriber = pubsub.SubscriberClient()
         subscription_path = subscriber.subscription_path(
-            project, Utils.hyphen_name(name)
+            project, f"{subscription_name}-{UUID}"
         )
         subscription = subscriber.create_subscription(subscription_path, topic_path)
 
-        logging.info(f"Created pubsub_subscription: {subscription.name}")
+        print(f"pubsub_subscription: {subscription.name}")
         yield subscription.name
 
         # Due to the pinned library dependencies in apache-beam, client
@@ -154,9 +125,8 @@ def pubsub_subscription(
             "delete",
             subscription.name,
         ]
-        logging.info(f"{cmd}")
+        print(cmd)
         subprocess.run(cmd, check=True)
-        logging.info(f"Deleted pubsub_subscription: {subscription.name}")
 
     @staticmethod
     def pubsub_publisher(
@@ -176,229 +146,128 @@ def _infinite_publish_job() -> None:
                 time.sleep(sleep_sec)
 
         # Start a subprocess in the background to do the publishing.
-        logging.info(f"Starting publisher on {topic_path}")
+        print(f"Starting publisher on {topic_path}")
         p = mp.Process(target=_infinite_publish_job)
         p.start()
 
         yield p.is_alive()
 
         # For cleanup, terminate the background process.
-        logging.info("Stopping publisher")
+        print("Stopping publisher")
         p.join(timeout=0)
         p.terminate()
 
     @staticmethod
-    def cloud_build_submit(
-        image_name: Optional[str] = None,
-        config: Optional[str] = None,
-        source: str = ".",
-        substitutions: Optional[Dict[str, str]] = None,
+    def container_image(
+        image_path: str,
         project: str = PROJECT,
-    ) -> None:
-        """Sends a Cloud Build job, if an image_name is provided it will be deleted at teardown."""
+        tag: str = "latest",
+    ) -> str:
+        image_name = f"gcr.io/{project}/{image_path}-{UUID}:{tag}"
         cmd = ["gcloud", "auth", "configure-docker"]
-        logging.info(f"{cmd}")
+        print(cmd)
+        subprocess.run(cmd, check=True)
+        cmd = [
+            "gcloud",
+            "builds",
+            "submit",
+            f"--project={project}",
+            f"--tag={image_name}",
+            ".",
+        ]
+        print(cmd)
         subprocess.run(cmd, check=True)
 
-        if substitutions:
-            cmd_substitutions = [
-                f"--substitutions={','.join([k + '=' + v for k, v in substitutions.items()])}"
-            ]
-        else:
-            cmd_substitutions = []
-
-        if config:
-            with open(config) as f:
-                cmd = [
-                    "gcloud",
-                    "builds",
-                    "submit",
-                    f"--project={project}",
-                    f"--config={config}",
-                    *cmd_substitutions,
-                    source,
-                ]
-                logging.info(f"{cmd}")
-                subprocess.run(cmd, check=True)
-                logging.info(f"Cloud build finished successfully: {config}")
-                yield f.read()
-        elif image_name:
-            cmd = [
-                "gcloud",
-                "builds",
-                "submit",
-                f"--project={project}",
-                f"--tag=gcr.io/{project}/{image_name}:{UUID}",
-                *cmd_substitutions,
-                source,
-            ]
-            logging.info(f"{cmd}")
-            subprocess.run(cmd, check=True)
-            logging.info(f"Created image: gcr.io/{project}/{image_name}:{UUID}")
-            yield f"{image_name}:{UUID}"
-        else:
-            raise ValueError("must specify either `config` or `image_name`")
-
-        if image_name:
-            cmd = [
-                "gcloud",
-                "container",
-                "images",
-                "delete",
-                f"gcr.io/{project}/{image_name}:{UUID}",
-                f"--project={project}",
-                "--force-delete-tags",
-                "--quiet",
-            ]
-            logging.info(f"{cmd}")
-            subprocess.run(cmd, check=True)
-            logging.info(f"Deleted image: gcr.io/{project}/{image_name}:{UUID}")
+        print(f"container_image: {image_name}")
+        yield image_name
+
+        cmd = [
+            "gcloud",
+            "container",
+            "images",
+            "delete",
+            image_name,
+            f"--project={project}",
+            "--quiet",
+        ]
+        print(cmd)
+        subprocess.run(cmd, check=True)
 
     @staticmethod
-    def dataflow_jobs_list(
-        project: str = PROJECT, page_size: int = 30
-    ) -> Iterable[dict]:
+    def dataflow_job_id_from_job_name(
+        job_name: str,
+        project: str = PROJECT,
+    ) -> Optional[str]:
         from googleapiclient.discovery import build
 
         dataflow = build("dataflow", "v1b3")
 
-        response = {"nextPageToken": None}
-        while "nextPageToken" in response:
-            # For more info see:
-            #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/list
-            request = (
-                dataflow.projects()
-                .jobs()
-                .list(
-                    projectId=project,
-                    pageToken=response["nextPageToken"],
-                    pageSize=page_size,
-                )
+        # Only return the 50 most recent results - our job is likely to be in here.
+        # If the job is not found, first try increasing this number.[]''job_id
+        # For more info see:
+        #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/list
+        jobs_request = (
+            dataflow.projects()
+            .jobs()
+            .list(
+                projectId=project,
+                filter="ACTIVE",
+                pageSize=50,
             )
-            response = request.execute()
-            for job in response["jobs"]:
-                yield job
+        )
+        response = jobs_request.execute()
+
+        # Search for the job in the list that has our name (names are unique)
+        for job in response["jobs"]:
+            if job["name"] == job_name:
+                return job["id"]
+        return None
 
     @staticmethod
-    def dataflow_jobs_get(
-        job_id: Optional[str] = None,
-        job_name: Optional[str] = None,
+    def dataflow_jobs_wait(
+        job_id: str,
         project: str = PROJECT,
-        list_page_size: int = 30,
-    ) -> Optional[Dict[str, Any]]:
+        status: str = "JOB_STATE_RUNNING",
+    ) -> bool:
         from googleapiclient.discovery import build
 
         dataflow = build("dataflow", "v1b3")
 
-        if job_id:
-            # For more info see:
-            #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/get
-            request = (
-                dataflow.projects()
-                .jobs()
-                .get(
-                    projectId=project,
-                    jobId=job_id,
-                    view="JOB_VIEW_SUMMARY",
-                )
-            )
-            # If the job is not found, this throws an HttpError exception.
-            job = request.execute()
-            logging.info(f"Found Dataflow job: {job}")
-            return job
-
-        elif job_name:
-            for job in Utils.dataflow_jobs_list(project, list_page_size):
-                if job["name"] == job_name:
-                    logging.info(f"Found Dataflow job: {job}")
-                    return job
-            raise ValueError(f"Dataflow job not found: job_name={job_name}")
+        sleep_time_seconds = 30
+        max_sleep_time = 10 * 60
 
-        else:
-            raise ValueError("must specify either `job_id` or `job_name`")
-
-    @staticmethod
-    def dataflow_jobs_wait(
-        job_id: Optional[str] = None,
-        job_name: Optional[str] = None,
-        project: str = PROJECT,
-        region: str = REGION,
-        until_status: str = "JOB_STATE_DONE",
-        timeout_sec: str = 30 * 60,
-        poll_interval_sec=60,
-        list_page_size=100,
-    ) -> Optional[str]:
-        """For a list of all the valid states:
-        https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState
-        """
-
-        # Wait until we reach the desired status, or the job finished in some way.
-        target_status = {
-            until_status,
-            "JOB_STATE_DONE",
-            "JOB_STATE_FAILED",
-            "JOB_STATE_CANCELLED",
-        }
-        logging.info(
-            f"Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
-        )
-        status = None
-        for _ in range(0, timeout_sec, poll_interval_sec):
+        print(f"Waiting for Dataflow job ID: {job_id} (until status {status})")
+        for _ in range(0, max_sleep_time, sleep_time_seconds):
             try:
-                job = Utils.dataflow_jobs_get(
-                    job_id=job_id,
-                    job_name=job_name,
-                    project=project,
-                    list_page_size=list_page_size,
-                )
-                status = job["currentState"]
-                if status in target_status:
-                    logging.info(
-                        f"Job status {status} in {target_status}, done waiting"
+                # For more info see:
+                #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/get
+                jobs_request = (
+                    dataflow.projects()
+                    .jobs()
+                    .get(
+                        projectId=project,
+                        jobId=job_id,
+                        view="JOB_VIEW_SUMMARY",
                     )
-                    return status
-                elif status == "JOB_STATE_FAILED":
-                    raise RuntimeError(
-                        "Dataflow job failed:\n"
-                        f"https://console.cloud.google.com/dataflow/jobs/{region}/{job_id}?project={project}"
-                    )
-                logging.info(
-                    f"Job status {status} not in {target_status}, retrying in {poll_interval_sec} seconds"
                 )
-            except Exception as e:
-                logging.exception(e)
-            time.sleep(poll_interval_sec)
-        if status is None:
-            raise RuntimeError(
-                f"Dataflow job not found: timeout_sec={timeout_sec}, target_status={target_status}, job_id={job_id}, job_name={job_name}"
-            )
-        else:
-            raise RuntimeError(
-                f"Dataflow job finished in status {status} but expected {target_status}: job_id={job_id}, job_name={job_name}"
-            )
+                response = jobs_request.execute()
+                print(response)
+                if response["currentState"] == status:
+                    return True
+            except:
+                pass
+            time.sleep(sleep_time_seconds)
+        return False
 
     @staticmethod
-    def dataflow_jobs_cancel(
+    def dataflow_jobs_cancel_by_job_id(
         job_id: str, project: str = PROJECT, region: str = REGION
     ) -> None:
-        logging.info(f"Cancelling Dataflow job ID: {job_id}")
+        print(f"Canceling Dataflow job ID: {job_id}")
         # We get an error using the googleapiclient.discovery APIs, probably
         # due to incompatible dependencies with apache-beam.
         # We use gcloud instead to cancel the job.
-        # https://cloud.google.com/sdk/gcloud/reference/dataflow/jobs/drain
-        cmd = [
-            "gcloud",
-            f"--project={project}",
-            "dataflow",
-            "jobs",
-            "drain",
-            job_id,
-            f"--region={region}",
-        ]
-        logging.info(f"{cmd}")
-        subprocess.run(cmd, check=True)
-
-        # https://cloud.google.com/sdk/gcloud/reference/dataflow/jobs/cancel
+        #   https://cloud.google.com/sdk/gcloud/reference/dataflow/jobs/cancel
         cmd = [
             "gcloud",
             f"--project={project}",
@@ -408,14 +277,22 @@ def dataflow_jobs_cancel(
             job_id,
             f"--region={region}",
         ]
-        logging.info(f"{cmd}")
         subprocess.run(cmd, check=True)
-        logging.info(f"Cancelled Dataflow job: {job_id}")
+
+    @staticmethod
+    def dataflow_jobs_cancel_by_job_name(
+        job_name: str, project: str = PROJECT, region: str = REGION
+    ) -> None:
+        # To cancel a dataflow job, we need its ID, not its name.
+        # If it doesn't, job_id will be equal to None.
+        job_id = Utils.dataflow_job_id_from_job_name(project, job_name)
+        if job_id is not None:
+            Utils.dataflow_jobs_cancel_by_job_id(job_id, project, region)
 
     @staticmethod
     def dataflow_flex_template_build(
         bucket_name: str,
-        image_name: str,
+        template_image: str,
         metadata_file: str,
         project: str = PROJECT,
         template_file: str = "template.json",
@@ -429,14 +306,14 @@ def dataflow_flex_template_build(
             "build",
             template_gcs_path,
             f"--project={project}",
-            f"--image=gcr.io/{project}/{image_name}",
+            f"--image={template_image}",
             "--sdk-language=PYTHON",
             f"--metadata-file={metadata_file}",
         ]
-        logging.info(f"{cmd}")
+        print(cmd)
         subprocess.run(cmd, check=True)
 
-        logging.info(f"dataflow_flex_template_build: {template_gcs_path}")
+        print(f"dataflow_flex_template_build: {template_gcs_path}")
         yield template_gcs_path
         # The template file gets deleted when we delete the bucket.
 
@@ -452,8 +329,8 @@ def dataflow_flex_template_run(
         import yaml
 
         # https://cloud.google.com/sdk/gcloud/reference/dataflow/flex-template/run
-        unique_job_name = Utils.hyphen_name(job_name)
-        logging.info(f"dataflow_job_name: {unique_job_name}")
+        unique_job_name = f"{job_name}-{UUID}"
+        print(f"dataflow_job_name: {unique_job_name}")
         cmd = [
             "gcloud",
             "dataflow",
@@ -463,15 +340,14 @@ def dataflow_flex_template_run(
             f"--template-file-gcs-location={template_path}",
             f"--project={project}",
             f"--region={region}",
-            f"--staging-location=gs://{bucket_name}/staging",
         ] + [
             f"--parameters={name}={value}"
             for name, value in {
                 **parameters,
+                "temp_location": f"gs://{bucket_name}/temp",
             }.items()
         ]
-        logging.info(f"{cmd}")
-
+        print(cmd)
         try:
             # The `capture_output` option was added in Python 3.7, so we must
             # pass the `stdout` and `stderr` options explicitly to support 3.6.
@@ -481,23 +357,22 @@ def dataflow_flex_template_run(
             )
             stdout = p.stdout.decode("utf-8")
             stderr = p.stderr.decode("utf-8")
-            logging.info(f"Launched Dataflow Flex Template job: {unique_job_name}")
+            print(f"Launched Dataflow Flex Template job: {unique_job_name}")
         except subprocess.CalledProcessError as e:
-            logging.info(e, file=sys.stderr)
-            stdout = e.stdout.decode("utf-8")
-            stderr = e.stderr.decode("utf-8")
+            print(e, file=sys.stderr)
+            stdout = stdout.decode("utf-8")
+            stderr = stderr.decode("utf-8")
         finally:
-            logging.info("--- stderr ---")
-            logging.info(stderr)
-            logging.info("--- stdout ---")
-            logging.info(stdout)
-            logging.info("--- end ---")
+            print("--- stderr ---")
+            print(stderr)
+            print("--- stdout ---")
+            print(stdout)
+            print("--- end ---")
         return yaml.safe_load(stdout)["job"]["id"]
 
 
 @pytest.fixture(scope="session")
 def utils() -> Utils:
-    logging.getLogger().setLevel(logging.INFO)
-    logging.info(f"Test unique identifier: {UUID}")
+    print(f"Test unique identifier: {UUID}")
     subprocess.run(["gcloud", "version"])
     return Utils()
diff --git a/dataflow/flex-templates/streaming_beam/e2e_test.py b/dataflow/flex-templates/streaming_beam/e2e_test.py
index 752fadf323b..e642306ed4b 100644
--- a/dataflow/flex-templates/streaming_beam/e2e_test.py
+++ b/dataflow/flex-templates/streaming_beam/e2e_test.py
@@ -11,18 +11,19 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 
 import json
-import logging
 import time
 
+# `conftest` cannot be imported when running in `nox`, but we still
+# try to import it for the autocomplete when writing the tests.
 try:
-    # `conftest` cannot be imported when running in `nox`, but we still
-    # try to import it for the autocomplete when writing the tests.
     from conftest import Utils
 except ModuleNotFoundError:
-    Utils = None
+    from typing import Any
+
+    Utils = Any
 import pytest
 
-NAME = "dataflow/flex-templates/streaming-beam"
+NAME = "dataflow-flex-templates-streaming-beam"
 
 
 @pytest.fixture(scope="session")
@@ -60,56 +61,48 @@ def pubsub_publisher(utils: Utils, pubsub_topic: str) -> bool:
 
 @pytest.fixture(scope="session")
 def flex_template_image(utils: Utils) -> str:
-    yield from utils.cloud_build_submit(NAME)
+    yield from utils.container_image(NAME)
 
 
 @pytest.fixture(scope="session")
 def flex_template_path(utils: Utils, bucket_name: str, flex_template_image: str) -> str:
     yield from utils.dataflow_flex_template_build(
         bucket_name=bucket_name,
-        image_name=flex_template_image,
+        template_image=flex_template_image,
         metadata_file="metadata.json",
     )
 
 
-@pytest.fixture(scope="session")
-def run_dataflow_job(
+def test_flex_template_run(
     utils: Utils,
     bucket_name: str,
     pubsub_publisher: str,
     pubsub_subscription: str,
     flex_template_path: str,
     bigquery_dataset: str,
-) -> str:
+) -> None:
 
+    bigquery_table = "output_table"
     job_id = utils.dataflow_flex_template_run(
         job_name=NAME,
         template_path=flex_template_path,
         bucket_name=bucket_name,
         parameters={
             "input_subscription": pubsub_subscription,
-            "output_table": f"{bigquery_dataset}.output_table",
+            "output_table": f"{bigquery_dataset}.{bigquery_table}",
         },
     )
 
     # Since this is a streaming job, it will never finish running.
     # First, lets wait until the job is running.
-    utils.dataflow_jobs_wait(job_id, until_status="JOB_STATE_RUNNING")
-
-    yield job_id
+    utils.dataflow_jobs_wait(job_id)
 
-    utils.dataflow_jobs_cancel(job_id)
-
-
-def test_flex_template_run(
-    utils: Utils, bigquery_dataset: str, run_dataflow_job: str
-) -> None:
-    # Wait for a while for data to arrive and get processed.
-    logging.info("Pipeline is running, waiting for messages to arrive")
-    time.sleep(5 * 60)
+    # Then, wait a minute for data to arrive, get processed, and cancel it.
+    time.sleep(60)
+    utils.dataflow_jobs_cancel_by_job_id(job_id)
 
     # Check for the output data in BigQuery.
-    query = f"SELECT * FROM `{bigquery_dataset.replace(':', '.')}.output_table`"
+    query = f"SELECT * FROM {bigquery_dataset.replace(':', '.')}.{bigquery_table}"
     rows = list(utils.bigquery_query(query))
     assert len(rows) > 0
     for row in rows:
diff --git a/dataflow/gpu-workers/conftest.py b/dataflow/gpu-workers/conftest.py
new file mode 100644
index 00000000000..41b3aea8490
--- /dev/null
+++ b/dataflow/gpu-workers/conftest.py
@@ -0,0 +1,503 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+
+from dataclasses import dataclass
+import itertools
+import json
+import logging
+import multiprocessing as mp
+import os
+import re
+import platform
+import subprocess
+import sys
+import time
+from typing import Any, Callable, Dict, Iterable, Optional, Union
+import uuid
+
+import pytest
+
+# Default options.
+UUID = uuid.uuid4().hex[0:6]
+PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"]
+REGION = "us-central1"
+
+RETRY_MAX_TIME = 5 * 60  # 5 minutes in seconds
+
+HYPHEN_NAME_RE = re.compile(r"[^\w\d-]+")
+UNDERSCORE_NAME_RE = re.compile(r"[^\w\d_]+")
+
+PYTHON_VERSION = "".join(platform.python_version_tuple()[0:2])
+
+
+@dataclass
+class Utils:
+    uuid: str = UUID
+    project: str = PROJECT
+    region: str = REGION
+
+    @staticmethod
+    def hyphen_name(name: str) -> str:
+        unique_name = f"{name}-py{PYTHON_VERSION}-{UUID}"
+        return HYPHEN_NAME_RE.sub("-", unique_name)
+
+    @staticmethod
+    def underscore_name(name: str) -> str:
+        return UNDERSCORE_NAME_RE.sub("_", Utils.hyphen_name(name))
+
+    @staticmethod
+    def storage_bucket(name: str) -> str:
+        from google.cloud import storage
+
+        storage_client = storage.Client()
+        bucket = storage_client.create_bucket(Utils.hyphen_name(name))
+
+        logging.info(f"Created storage_bucket: {bucket.name}")
+        yield bucket.name
+
+        # Print all the objects in the bucket before deleting for debugging.
+        logging.info(f"Deleting bucket {bucket.name} with the following contents:")
+        total_files = 0
+        total_size = 0
+        for blob in bucket.list_blobs():
+            logging.info(f"  - {blob.name} ({blob.size} bytes)")
+            total_files += 1
+            total_size += blob.size
+        logging.info(f"Total {total_files} files ({total_size} bytes)")
+
+        bucket.delete(force=True)
+        logging.info(f"Deleted storage_bucket: {bucket.name}")
+
+    @staticmethod
+    def bigquery_dataset(name: str, project: str = PROJECT) -> str:
+        from google.cloud import bigquery
+
+        bigquery_client = bigquery.Client()
+
+        dataset = bigquery_client.create_dataset(
+            bigquery.Dataset(f"{project}.{Utils.underscore_name(name)}")
+        )
+
+        logging.info(f"Created bigquery_dataset: {dataset.full_dataset_id}")
+        yield dataset.full_dataset_id
+
+        bigquery_client.delete_dataset(
+            dataset.full_dataset_id.replace(":", "."), delete_contents=True
+        )
+        logging.info(f"Deleted bigquery_dataset: {dataset.full_dataset_id}")
+
+    @staticmethod
+    def bigquery_query(query: str) -> Iterable[Dict[str, Any]]:
+        from google.cloud import bigquery
+
+        bigquery_client = bigquery.Client()
+        logging.info(f"Bigquery query: {query}")
+        for row in bigquery_client.query(query):
+            yield dict(row)
+
+    @staticmethod
+    def pubsub_topic(name: str, project: str = PROJECT) -> str:
+        from google.cloud import pubsub
+
+        publisher_client = pubsub.PublisherClient()
+        topic_path = publisher_client.topic_path(project, Utils.hyphen_name(name))
+        topic = publisher_client.create_topic(topic_path)
+
+        logging.info(f"Created pubsub_topic: {topic.name}")
+        yield topic.name
+
+        # Due to the pinned library dependencies in apache-beam, client
+        # library throws an error upon deletion.
+        # We use gcloud for a workaround. See also:
+        # https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4492
+        cmd = ["gcloud", "pubsub", "--project", project, "topics", "delete", topic.name]
+        logging.info(f"{cmd}")
+        subprocess.run(cmd, check=True)
+        logging.info(f"Deleted pubsub_topic: {topic.name}")
+
+    @staticmethod
+    def pubsub_subscription(
+        topic_path: str,
+        name: str,
+        project: str = PROJECT,
+    ) -> str:
+        from google.cloud import pubsub
+
+        subscriber = pubsub.SubscriberClient()
+        subscription_path = subscriber.subscription_path(
+            project, Utils.hyphen_name(name)
+        )
+        subscription = subscriber.create_subscription(subscription_path, topic_path)
+
+        logging.info(f"Created pubsub_subscription: {subscription.name}")
+        yield subscription.name
+
+        # Due to the pinned library dependencies in apache-beam, client
+        # library throws an error upon deletion.
+        # We use gcloud for a workaround. See also:
+        # https://github.com/GoogleCloudPlatform/python-docs-samples/issues/4492
+        cmd = [
+            "gcloud",
+            "pubsub",
+            "--project",
+            project,
+            "subscriptions",
+            "delete",
+            subscription.name,
+        ]
+        logging.info(f"{cmd}")
+        subprocess.run(cmd, check=True)
+        logging.info(f"Deleted pubsub_subscription: {subscription.name}")
+
+    @staticmethod
+    def pubsub_publisher(
+        topic_path: str,
+        new_msg: Callable[[int], str] = lambda i: json.dumps(
+            {"id": i, "content": f"message {i}"}
+        ),
+        sleep_sec: int = 1,
+    ) -> bool:
+        from google.cloud import pubsub
+
+        def _infinite_publish_job() -> None:
+            publisher_client = pubsub.PublisherClient()
+            for i in itertools.count():
+                msg = new_msg(i)
+                publisher_client.publish(topic_path, msg.encode("utf-8")).result()
+                time.sleep(sleep_sec)
+
+        # Start a subprocess in the background to do the publishing.
+        logging.info(f"Starting publisher on {topic_path}")
+        p = mp.Process(target=_infinite_publish_job)
+        p.start()
+
+        yield p.is_alive()
+
+        # For cleanup, terminate the background process.
+        logging.info("Stopping publisher")
+        p.join(timeout=0)
+        p.terminate()
+
+    @staticmethod
+    def cloud_build_submit(
+        image_name: Optional[str] = None,
+        config: Optional[str] = None,
+        source: str = ".",
+        substitutions: Optional[Dict[str, str]] = None,
+        project: str = PROJECT,
+    ) -> None:
+        """Sends a Cloud Build job, if an image_name is provided it will be deleted at teardown."""
+        cmd = ["gcloud", "auth", "configure-docker"]
+        logging.info(f"{cmd}")
+        subprocess.run(cmd, check=True)
+
+        if substitutions:
+            cmd_substitutions = [
+                f"--substitutions={','.join([k + '=' + v for k, v in substitutions.items()])}"
+            ]
+        else:
+            cmd_substitutions = []
+
+        if config:
+            with open(config) as f:
+                cmd = [
+                    "gcloud",
+                    "builds",
+                    "submit",
+                    f"--project={project}",
+                    f"--config={config}",
+                    *cmd_substitutions,
+                    source,
+                ]
+                logging.info(f"{cmd}")
+                subprocess.run(cmd, check=True)
+                logging.info(f"Cloud build finished successfully: {config}")
+                yield f.read()
+        elif image_name:
+            cmd = [
+                "gcloud",
+                "builds",
+                "submit",
+                f"--project={project}",
+                f"--tag=gcr.io/{project}/{image_name}:{UUID}",
+                *cmd_substitutions,
+                source,
+            ]
+            logging.info(f"{cmd}")
+            subprocess.run(cmd, check=True)
+            logging.info(f"Created image: gcr.io/{project}/{image_name}:{UUID}")
+            yield f"{image_name}:{UUID}"
+        else:
+            raise ValueError("must specify either `config` or `image_name`")
+
+        if image_name:
+            cmd = [
+                "gcloud",
+                "container",
+                "images",
+                "delete",
+                f"gcr.io/{project}/{image_name}:{UUID}",
+                f"--project={project}",
+                "--force-delete-tags",
+                "--quiet",
+            ]
+            logging.info(f"{cmd}")
+            subprocess.run(cmd, check=True)
+            logging.info(f"Deleted image: gcr.io/{project}/{image_name}:{UUID}")
+
+    @staticmethod
+    def dataflow_jobs_list(
+        project: str = PROJECT, page_size: int = 30
+    ) -> Iterable[dict]:
+        from googleapiclient.discovery import build
+
+        dataflow = build("dataflow", "v1b3")
+
+        response = {"nextPageToken": None}
+        while "nextPageToken" in response:
+            # For more info see:
+            #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/list
+            request = (
+                dataflow.projects()
+                .jobs()
+                .list(
+                    projectId=project,
+                    pageToken=response["nextPageToken"],
+                    pageSize=page_size,
+                )
+            )
+            response = request.execute()
+            for job in response["jobs"]:
+                yield job
+
+    @staticmethod
+    def dataflow_jobs_get(
+        job_id: Optional[str] = None,
+        job_name: Optional[str] = None,
+        project: str = PROJECT,
+        list_page_size: int = 30,
+    ) -> Optional[Dict[str, Any]]:
+        from googleapiclient.discovery import build
+
+        dataflow = build("dataflow", "v1b3")
+
+        if job_id:
+            # For more info see:
+            #   https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs/get
+            request = (
+                dataflow.projects()
+                .jobs()
+                .get(
+                    projectId=project,
+                    jobId=job_id,
+                    view="JOB_VIEW_SUMMARY",
+                )
+            )
+            # If the job is not found, this throws an HttpError exception.
+            job = request.execute()
+            logging.info(f"Found Dataflow job: {job}")
+            return job
+
+        elif job_name:
+            for job in Utils.dataflow_jobs_list(project, list_page_size):
+                if job["name"] == job_name:
+                    logging.info(f"Found Dataflow job: {job}")
+                    return job
+            raise ValueError(f"Dataflow job not found: job_name={job_name}")
+
+        else:
+            raise ValueError("must specify either `job_id` or `job_name`")
+
+    @staticmethod
+    def dataflow_jobs_wait(
+        job_id: Optional[str] = None,
+        job_name: Optional[str] = None,
+        project: str = PROJECT,
+        region: str = REGION,
+        until_status: str = "JOB_STATE_DONE",
+        timeout_sec: str = 30 * 60,
+        poll_interval_sec=60,
+        list_page_size=100,
+    ) -> Optional[str]:
+        """For a list of all the valid states:
+        https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState
+        """
+
+        # Wait until we reach the desired status, or the job finished in some way.
+        target_status = {
+            until_status,
+            "JOB_STATE_DONE",
+            "JOB_STATE_FAILED",
+            "JOB_STATE_CANCELLED",
+        }
+        logging.info(
+            f"Waiting for Dataflow job until {target_status}: job_id={job_id}, job_name={job_name}"
+        )
+        status = None
+        for _ in range(0, timeout_sec, poll_interval_sec):
+            try:
+                job = Utils.dataflow_jobs_get(
+                    job_id=job_id,
+                    job_name=job_name,
+                    project=project,
+                    list_page_size=list_page_size,
+                )
+                status = job["currentState"]
+                if status in target_status:
+                    logging.info(
+                        f"Job status {status} in {target_status}, done waiting"
+                    )
+                    return status
+                elif status == "JOB_STATE_FAILED":
+                    raise RuntimeError(
+                        "Dataflow job failed:\n"
+                        f"https://console.cloud.google.com/dataflow/jobs/{region}/{job_id}?project={project}"
+                    )
+                logging.info(
+                    f"Job status {status} not in {target_status}, retrying in {poll_interval_sec} seconds"
+                )
+            except Exception as e:
+                logging.exception(e)
+            time.sleep(poll_interval_sec)
+        if status is None:
+            raise RuntimeError(
+                f"Dataflow job not found: timeout_sec={timeout_sec}, target_status={target_status}, job_id={job_id}, job_name={job_name}"
+            )
+        else:
+            raise RuntimeError(
+                f"Dataflow job finished in status {status} but expected {target_status}: job_id={job_id}, job_name={job_name}"
+            )
+
+    @staticmethod
+    def dataflow_jobs_cancel(
+        job_id: str, project: str = PROJECT, region: str = REGION
+    ) -> None:
+        logging.info(f"Cancelling Dataflow job ID: {job_id}")
+        # We get an error using the googleapiclient.discovery APIs, probably
+        # due to incompatible dependencies with apache-beam.
+        # We use gcloud instead to cancel the job.
+        # https://cloud.google.com/sdk/gcloud/reference/dataflow/jobs/drain
+        cmd = [
+            "gcloud",
+            f"--project={project}",
+            "dataflow",
+            "jobs",
+            "drain",
+            job_id,
+            f"--region={region}",
+        ]
+        logging.info(f"{cmd}")
+        subprocess.run(cmd, check=True)
+
+        # https://cloud.google.com/sdk/gcloud/reference/dataflow/jobs/cancel
+        cmd = [
+            "gcloud",
+            f"--project={project}",
+            "dataflow",
+            "jobs",
+            "cancel",
+            job_id,
+            f"--region={region}",
+        ]
+        logging.info(f"{cmd}")
+        subprocess.run(cmd, check=True)
+        logging.info(f"Cancelled Dataflow job: {job_id}")
+
+    @staticmethod
+    def dataflow_flex_template_build(
+        bucket_name: str,
+        image_name: str,
+        metadata_file: str,
+        project: str = PROJECT,
+        template_file: str = "template.json",
+    ) -> str:
+        # https://cloud.google.com/sdk/gcloud/reference/dataflow/flex-template/build
+        template_gcs_path = f"gs://{bucket_name}/{template_file}"
+        cmd = [
+            "gcloud",
+            "dataflow",
+            "flex-template",
+            "build",
+            template_gcs_path,
+            f"--project={project}",
+            f"--image=gcr.io/{project}/{image_name}",
+            "--sdk-language=PYTHON",
+            f"--metadata-file={metadata_file}",
+        ]
+        logging.info(f"{cmd}")
+        subprocess.run(cmd, check=True)
+
+        logging.info(f"dataflow_flex_template_build: {template_gcs_path}")
+        yield template_gcs_path
+        # The template file gets deleted when we delete the bucket.
+
+    @staticmethod
+    def dataflow_flex_template_run(
+        job_name: str,
+        template_path: str,
+        bucket_name: str,
+        parameters: Dict[str, str] = {},
+        project: str = PROJECT,
+        region: str = REGION,
+    ) -> str:
+        import yaml
+
+        # https://cloud.google.com/sdk/gcloud/reference/dataflow/flex-template/run
+        unique_job_name = Utils.hyphen_name(job_name)
+        logging.info(f"dataflow_job_name: {unique_job_name}")
+        cmd = [
+            "gcloud",
+            "dataflow",
+            "flex-template",
+            "run",
+            unique_job_name,
+            f"--template-file-gcs-location={template_path}",
+            f"--project={project}",
+            f"--region={region}",
+            f"--staging-location=gs://{bucket_name}/staging",
+        ] + [
+            f"--parameters={name}={value}"
+            for name, value in {
+                **parameters,
+            }.items()
+        ]
+        logging.info(f"{cmd}")
+
+        try:
+            # The `capture_output` option was added in Python 3.7, so we must
+            # pass the `stdout` and `stderr` options explicitly to support 3.6.
+            # https://docs.python.org/3/library/subprocess.html#subprocess.run
+            p = subprocess.run(
+                cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE
+            )
+            stdout = p.stdout.decode("utf-8")
+            stderr = p.stderr.decode("utf-8")
+            logging.info(f"Launched Dataflow Flex Template job: {unique_job_name}")
+        except subprocess.CalledProcessError as e:
+            logging.info(e, file=sys.stderr)
+            stdout = e.stdout.decode("utf-8")
+            stderr = e.stderr.decode("utf-8")
+        finally:
+            logging.info("--- stderr ---")
+            logging.info(stderr)
+            logging.info("--- stdout ---")
+            logging.info(stdout)
+            logging.info("--- end ---")
+        return yaml.safe_load(stdout)["job"]["id"]
+
+
+@pytest.fixture(scope="session")
+def utils() -> Utils:
+    logging.getLogger().setLevel(logging.INFO)
+    logging.info(f"Test unique identifier: {UUID}")
+    subprocess.run(["gcloud", "version"])
+    return Utils()

From 33a9f09506952b9f7b71ca8c8e2d447fc9f9e0ed Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 23 Jun 2021 18:36:46 +0000
Subject: [PATCH 66/87] remove unused import

---
 dataflow/gpu-workers/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/gpu-workers/conftest.py b/dataflow/gpu-workers/conftest.py
index 41b3aea8490..14c568548af 100644
--- a/dataflow/gpu-workers/conftest.py
+++ b/dataflow/gpu-workers/conftest.py
@@ -21,7 +21,7 @@
 import subprocess
 import sys
 import time
-from typing import Any, Callable, Dict, Iterable, Optional, Union
+from typing import Any, Callable, Dict, Iterable, Optional
 import uuid
 
 import pytest

From d9f776aa8cc389ebd4ee3eaa1169e9fab57fb837 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 23 Jun 2021 19:04:37 +0000
Subject: [PATCH 67/87] reverted streaming-beam changes

---
 dataflow/flex-templates/streaming_beam/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/flex-templates/streaming_beam/requirements.txt b/dataflow/flex-templates/streaming_beam/requirements.txt
index 009bc29b6d8..7c934ad8979 100644
--- a/dataflow/flex-templates/streaming_beam/requirements.txt
+++ b/dataflow/flex-templates/streaming_beam/requirements.txt
@@ -1 +1 @@
-apache-beam[gcp]==2.30.0
+apache-beam[gcp]==2.29.0

From 25303eebcd94791076ede70e91b663b8a36a2937 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 23 Jun 2021 20:34:57 +0000
Subject: [PATCH 68/87] updated image versions

---
 dataflow/gpu-workers/pytorch-minimal/Dockerfile    | 2 +-
 dataflow/gpu-workers/tensorflow-landsat/Dockerfile | 2 +-
 dataflow/gpu-workers/tensorflow-landsat/run.yaml   | 1 +
 dataflow/gpu-workers/tensorflow-minimal/Dockerfile | 2 +-
 dataflow/gpu-workers/tensorflow-minimal/run.yaml   | 1 +
 5 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/dataflow/gpu-workers/pytorch-minimal/Dockerfile b/dataflow/gpu-workers/pytorch-minimal/Dockerfile
index 0dcc04d017d..94aa381e344 100644
--- a/dataflow/gpu-workers/pytorch-minimal/Dockerfile
+++ b/dataflow/gpu-workers/pytorch-minimal/Dockerfile
@@ -17,7 +17,7 @@ FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-runtime
 WORKDIR /pipeline
 
 # Copy the Apache Beam worker files and the pipeline source files.
-COPY --from=apache/beam_python3.8_sdk:2.29.0 /opt/apache/beam /opt/apache/beam
+COPY --from=apache/beam_python3.8_sdk:2.30.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
diff --git a/dataflow/gpu-workers/tensorflow-landsat/Dockerfile b/dataflow/gpu-workers/tensorflow-landsat/Dockerfile
index cc2d7eba729..7a50a862756 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/Dockerfile
+++ b/dataflow/gpu-workers/tensorflow-landsat/Dockerfile
@@ -21,7 +21,7 @@ FROM nvcr.io/nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
 WORKDIR /pipeline
 
 # Copy the Apache Beam worker files and the pipeline source files.
-COPY --from=apache/beam_python3.8_sdk:2.29.0 /opt/apache/beam /opt/apache/beam
+COPY --from=apache/beam_python3.8_sdk:2.30.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
diff --git a/dataflow/gpu-workers/tensorflow-landsat/run.yaml b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
index 2b97dd28e48..1ac286736e1 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-landsat/run.yaml
@@ -49,6 +49,7 @@ steps:
   - --sdk_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
+  - --experiment=no_use_multiple_sdk_containers
   - --disk_size_gb=50
 
 options:
diff --git a/dataflow/gpu-workers/tensorflow-minimal/Dockerfile b/dataflow/gpu-workers/tensorflow-minimal/Dockerfile
index ff88332cd41..48b4b390eeb 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/Dockerfile
+++ b/dataflow/gpu-workers/tensorflow-minimal/Dockerfile
@@ -21,7 +21,7 @@ FROM nvcr.io/nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
 WORKDIR /pipeline
 
 # Copy the Apache Beam worker files and the pipeline source files.
-COPY --from=apache/beam_python3.8_sdk:2.29.0 /opt/apache/beam /opt/apache/beam
+COPY --from=apache/beam_python3.8_sdk:2.30.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
diff --git a/dataflow/gpu-workers/tensorflow-minimal/run.yaml b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
index 7cb9d389134..320c5359fe6 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/run.yaml
+++ b/dataflow/gpu-workers/tensorflow-minimal/run.yaml
@@ -46,6 +46,7 @@ steps:
   - --sdk_container_image=gcr.io/$PROJECT_ID/$_IMAGE
   - --experiment=worker_accelerator=type:$_GPU_TYPE;count:$_GPU_COUNT;install-nvidia-driver
   - --experiment=use_runner_v2
+  - --experiment=no_use_multiple_sdk_containers
   - --disk_size_gb=50
 
 options:

From fdb2610603e89c4b806a26b504c4a5bb888190b4 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 23 Jun 2021 22:10:08 +0000
Subject: [PATCH 69/87] restore old sample to avoid breaking docs

---
 dataflow/gpu-workers/.dockerignore         |   6 +
 dataflow/gpu-workers/.gcloudignore         |   6 +
 dataflow/gpu-workers/Dockerfile            |  54 +++
 dataflow/gpu-workers/README.md             |   5 +
 dataflow/gpu-workers/cloudbuild.yaml       |  35 ++
 dataflow/gpu-workers/e2e_test.py           | 120 +++++++
 dataflow/gpu-workers/landsat_view.py       | 368 +++++++++++++++++++++
 dataflow/gpu-workers/noxfile_config.py     |  38 +++
 dataflow/gpu-workers/requirements-test.txt |   2 +
 dataflow/gpu-workers/requirements.txt      |   4 +
 10 files changed, 638 insertions(+)
 create mode 100644 dataflow/gpu-workers/.dockerignore
 create mode 100644 dataflow/gpu-workers/.gcloudignore
 create mode 100644 dataflow/gpu-workers/Dockerfile
 create mode 100644 dataflow/gpu-workers/README.md
 create mode 100644 dataflow/gpu-workers/cloudbuild.yaml
 create mode 100644 dataflow/gpu-workers/e2e_test.py
 create mode 100644 dataflow/gpu-workers/landsat_view.py
 create mode 100644 dataflow/gpu-workers/noxfile_config.py
 create mode 100644 dataflow/gpu-workers/requirements-test.txt
 create mode 100644 dataflow/gpu-workers/requirements.txt

diff --git a/dataflow/gpu-workers/.dockerignore b/dataflow/gpu-workers/.dockerignore
new file mode 100644
index 00000000000..04f5ec66ca6
--- /dev/null
+++ b/dataflow/gpu-workers/.dockerignore
@@ -0,0 +1,6 @@
+# Ignore files for docker.
+.mypy_cache/
+.nox/
+__pycache__/
+env/
+outputs/
diff --git a/dataflow/gpu-workers/.gcloudignore b/dataflow/gpu-workers/.gcloudignore
new file mode 100644
index 00000000000..cda483971fd
--- /dev/null
+++ b/dataflow/gpu-workers/.gcloudignore
@@ -0,0 +1,6 @@
+# Ignore files for gcloud like Cloud Build.
+.mypy_cache/
+.nox/
+__pycache__/
+env/
+outputs/
diff --git a/dataflow/gpu-workers/Dockerfile b/dataflow/gpu-workers/Dockerfile
new file mode 100644
index 00000000000..d9003409717
--- /dev/null
+++ b/dataflow/gpu-workers/Dockerfile
@@ -0,0 +1,54 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Make sure the CUDA and cuDNN versions are compatible with your TensorFlow version.
+#   https://www.tensorflow.org/install/source#gpu
+# Check the Nvidia container registry catalog to look at the available Nvidia images:
+#   https://ngc.nvidia.com/catalog/containers/nvidia:cuda
+FROM nvidia/cuda:11.3.1-cudnn8-runtime-ubuntu20.04
+
+# The Python version of the Dockerfile MUST match the Python version you use
+# to launch the Dataflow job.
+ARG python_version=3.8
+
+WORKDIR /root
+
+# Copy the Apache Beam worker files and the requirements.txt file.
+COPY --from=apache/beam_python3.8_sdk:2.30.0 /opt/apache/beam /opt/apache/beam
+COPY requirements.txt .
+
+# Update PATH so we find our new Conda and Python installations.
+ENV PATH=/opt/python/bin:/opt/conda/bin:$PATH
+
+RUN apt-get update \
+    && apt-get upgrade -y \
+    && apt-get install -y wget \
+    && rm -rf /var/lib/apt/lists/* \
+    # The nvidia image doesn't come with Python pre-installed.
+    # We use Miniconda to install the Python version of our choice.
+    && wget -q https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \
+    && sh Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \
+    && rm Miniconda3-latest-Linux-x86_64.sh \
+    # Create a new Python environment and install our requirements.
+    # We don't need to update $PATH since /usr/local is already in $PATH.
+    && conda create -y -p /opt/python python=$python_version pip \
+    && pip install --no-cache-dir -U pip \
+    && pip install --no-cache-dir -r requirements.txt \
+    && conda clean -y --all --force-pkgs-dirs \
+    # Beam workers looks for pip at /usr/local/bin/pip by default.
+    # This can be omitted in Beam 2.30.0 and later versions.
+    && ln -s $(which pip) /usr/local/bin/pip
+
+# Set the entrypoint to Apache Beam SDK worker launcher.
+ENTRYPOINT [ "/opt/apache/beam/boot" ]
diff --git a/dataflow/gpu-workers/README.md b/dataflow/gpu-workers/README.md
new file mode 100644
index 00000000000..a71f0da3e95
--- /dev/null
+++ b/dataflow/gpu-workers/README.md
@@ -0,0 +1,5 @@
+# Workers with GPUs
+
+[![Open in Cloud Shell](http://gstatic.com/cloudssh/images/open-btn.svg)](https://console.cloud.google.com/cloudshell/open?git_repo=https://github.com/GoogleCloudPlatform/python-docs-samples&page=editor&open_in_editor=dataflow/gpu-workers/README.md)
+
+📝 Tutorial: [Processing Landsat satellite images with GPUs](https://cloud.google.com/dataflow/docs/samples/satellite-images-gpus)
diff --git a/dataflow/gpu-workers/cloudbuild.yaml b/dataflow/gpu-workers/cloudbuild.yaml
new file mode 100644
index 00000000000..dec3d7aabb8
--- /dev/null
+++ b/dataflow/gpu-workers/cloudbuild.yaml
@@ -0,0 +1,35 @@
+# Copyright 2021 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# To build the container image:
+#   PYTHON_VERSION=`python -c 'import platform; print(platform.python_version())'`
+#   gcloud builds submit --substitutions _PYTHON_VERSION=$PYTHON_VERSION . --timeout 20m
+
+steps:
+  # Build the container image with the Python version of our choice.
+  - name: gcr.io/cloud-builders/docker
+    args:
+      [ 'build'
+      , '--build-arg=python_version=$_PYTHON_VERSION'
+      , '--tag=gcr.io/$PROJECT_ID/$_IMAGE'
+      , '.'
+      ]
+
+  # Push the image to Container Registry.
+  - name: gcr.io/cloud-builders/docker
+    args: [ 'push', 'gcr.io/$PROJECT_ID/$_IMAGE' ]
+
+substitutions:
+  _PYTHON_VERSION: '3.8'
+  _IMAGE: samples/dataflow/tensorflow-gpu:latest
diff --git a/dataflow/gpu-workers/e2e_test.py b/dataflow/gpu-workers/e2e_test.py
new file mode 100644
index 00000000000..f3f105d7b6a
--- /dev/null
+++ b/dataflow/gpu-workers/e2e_test.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import platform
+import subprocess
+import uuid
+
+from google.cloud import storage
+import pytest
+
+SUFFIX = uuid.uuid4().hex[0:6]
+PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"]
+BUCKET_NAME = f"dataflow-gpu-test-{SUFFIX}"
+IMAGE_NAME = f"dataflow/gpu-workers/test-{SUFFIX}:latest"
+REGION = "us-central1"
+ZONE = "us-central1-f"
+
+
+@pytest.fixture(scope="session")
+def bucket_name() -> str:
+    storage_client = storage.Client()
+    bucket = storage_client.create_bucket(BUCKET_NAME)
+
+    yield BUCKET_NAME
+
+    bucket.delete(force=True)
+
+
+@pytest.fixture(scope="session")
+def configure_docker() -> None:
+    subprocess.run(
+        [
+            "gcloud",
+            "auth",
+            "configure-docker",
+        ]
+    )
+
+
+@pytest.fixture(scope="session")
+def image_name(configure_docker: None) -> str:
+    # See the `cloudbuild.yaml` for the configuration for this build.
+    substitutions = {
+        "_PYTHON_VERSION": platform.python_version(),
+        "_IMAGE": IMAGE_NAME,
+    }
+    print(f"-- Cloud build substitutions: {substitutions}")
+    subprocess.run(
+        [
+            "gcloud",
+            "builds",
+            "submit",
+            f"--project={PROJECT}",
+            f"--substitutions={','.join([k + '=' + v for k, v in substitutions.items()])}",
+            "--timeout=30m",
+            "--quiet",
+        ],
+        check=True,
+    )
+
+    yield f"gcr.io/{PROJECT}/{IMAGE_NAME}"
+
+    # Delete the image when we're done.
+    subprocess.run(
+        [
+            "gcloud",
+            "container",
+            "images",
+            "delete",
+            f"gcr.io/{PROJECT}/{IMAGE_NAME}",
+            f"--project={PROJECT}",
+            "--quiet",
+        ],
+        check=True,
+    )
+
+
+def test_end_to_end(bucket_name: str, image_name: str) -> None:
+    # Run the Beam pipeline in Dataflow making sure GPUs are used.
+    gpu_type = "nvidia-tesla-t4"
+    subprocess.run(
+        [
+            "python",
+            "landsat_view.py",
+            f"--output-path-prefix=gs://{bucket_name}/outputs/",
+            "--runner=DataflowRunner",
+            f"--job_name=gpu-workers-{SUFFIX}",
+            f"--project={PROJECT}",
+            f"--region={REGION}",
+            f"--temp_location=gs://{bucket_name}/temp",
+            "--worker_machine_type=custom-1-13312-ext",
+            "--disk_size_gb=300",
+            f"--worker_harness_container_image={image_name}",
+            f"--worker_zone={ZONE}",
+            f"--experiments=worker_accelerator=type={gpu_type},count=1,install-nvidia-driver",
+            "--experiments=use_runner_v2",
+        ],
+        check=True,
+    )
+
+    # Check that output files were created and are not empty.
+    storage_client = storage.Client()
+    output_files = list(storage_client.list_blobs(bucket_name, prefix="outputs/"))
+    assert len(output_files) > 0, "No output files found"
+    for output_file in output_files:
+        assert output_file.size > 0, f"Output file is empty: {output_file.name}"
diff --git a/dataflow/gpu-workers/landsat_view.py b/dataflow/gpu-workers/landsat_view.py
new file mode 100644
index 00000000000..9e61016eabf
--- /dev/null
+++ b/dataflow/gpu-workers/landsat_view.py
@@ -0,0 +1,368 @@
+#!/usr/bin/env python
+
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""This Apache Beam pipeline processes Landsat 8 satellite images and renders
+them as JPEG files.
+
+A Landsat 8 image consists of 11 bands. Each band contains the data for a
+specific range of the electromagnetic spectrum.
+
+A JPEG image consists of three channels: Red, Green, and Blue. For Landsat 8
+images, these correspond to Band 4 (red), Band 3 (green), and Band 2 (blue).
+
+These bands contain the raw pixel data directly from the satellite sensors. The
+values in each band can go from 0 to unbounded positive values. For a JPEG image
+we need to clamp them into integers between 0 and 255 for each channel.
+
+For this, we supply visualization parameters, commonly called `vis_params`.
+These visualization parameters include:
+
+- The bands for the RGB cannels, typically [B4, B3, B2] for Landsat 8.
+- The minimum value in each band, typically 0 for Landsat 8.
+- The maximum value in each band, this varies depending on the light exposure.
+- A gamma value for gamma correction.
+
+The Landsat data is read from the Landsat public dataset in Cloud Storage.
+For more information on the Landsat dataset:
+    https://cloud.google.com/storage/docs/public-datasets/landsat
+
+The overall workflow of the pipeline is the following:
+
+- Parse one or more Landsat scene IDs from user-provided flags..
+- Get the Cloud Storage paths of all the RGB bands.
+- Load the pixel values for each band from Cloud Storage.
+- Preprocess pixels: clamp values and apply gamma correction.
+- Create a JPEG image and save it to Cloud Storage.
+"""
+
+import argparse
+import logging
+import os
+import re
+from typing import Any, Dict, List, Tuple
+
+import apache_beam as beam
+from apache_beam.options.pipeline_options import PipelineOptions
+from apache_beam.typehints.typehints import Optional
+import numpy as np
+from PIL import Image
+import rasterio
+import tensorflow as tf
+
+DEFAULT_RGB_BAND_NAMES = ["B4", "B3", "B2"]
+DEFAULT_MIN_BAND_VALUE = 0.0
+DEFAULT_MAX_BAND_VALUE = 12000.0
+DEFAULT_GAMMA = 0.5
+
+DEFAULT_SCENES = [
+    "LC08_L1TP_001067_20200727_20200807_01_T1",  # Brazil-Bolivia boundary
+    "LC08_L1TP_019024_20190621_20190704_01_T1",  # Nottaway river delta, Quebec
+    "LC08_L1TP_019046_20191214_20191226_01_T1",  # Yucatan peninsula
+    "LC08_L1TP_037035_20191212_20191212_01_T1",  # Grand canyon, Arizona
+    "LC08_L1TP_045031_20200715_20200722_01_T1",  # Mount Shasta, California
+    "LC08_L1TP_064011_20200618_20200625_01_T1",  # Mackenzie river delta, Canada
+    "LC08_L1TP_073087_20200516_20200527_01_T1",  # Mt. Taranaki, New Zealand
+    "LC08_L1TP_083074_20180805_20180814_01_T1",  # Nouvelle-Calédonie
+    "LC08_L1TP_098063_20200703_20200708_01_T1",  # Manam volcano, Papua New Guinea
+    "LC08_L1TP_109078_20200411_20200422_01_T1",  # Lake Carnegie, West Australia
+    "LC08_L1TP_110036_20191009_20191018_01_T1",  # Osaka 大阪市, Japan
+    "LC08_L1TP_115078_20200608_20200625_01_T1",  # Sediment deposits, West Australia
+    "LC08_L1TP_119038_20191109_20191115_01_T1",  # Lake Tai 太湖, China
+    "LC08_L1TP_135040_20190314_20190325_01_T1",  # Arunachal Pradesh, India
+    "LC08_L1TP_137045_20200211_20200225_01_T1",  # Ganges river delta, India
+    "LC08_L1TP_166075_20180608_20180615_01_T1",  # Bazaruto island, Mozambique
+    "LC08_L1TP_169034_20200720_20200807_01_T1",  # Lake Urmia دریاچه ارومیه, Iran
+    "LC08_L1TP_170059_20200101_20200113_01_T1",  # Mount Elgon, Uganda
+    "LC08_L1TP_175079_20200511_20200526_01_T1",  # Sand dunes, South Africa
+    "LC08_L1TP_178069_20200804_20200821_01_T1",  # Angola
+    "LC08_L1TP_178078_20200804_20200821_01_T1",  # Sand dunes, Namibia
+    "LC08_L1TP_191020_20200815_20200822_01_T1",  # Phytoplankton at Gotland, Sweden
+    "LC08_L1TP_195028_20200116_20200127_01_T1",  # Swiss Alps
+    "LC08_L1TP_203045_20200108_20200114_01_T1",  # Eye of the Sahara, Mauritania
+    "LC08_L1TP_231094_20190906_20190917_01_T1",  # Patagonia, South America
+]
+
+SCENE_RE = re.compile(
+    r"(?P<sensor>L[COTEM]0[78])_"
+    r"(?P<correction_level>L1TP|L1GT|L1GS)_"
+    r"(?P<wrs_path>\d\d\d)"
+    r"(?P<wrs_row>\d\d\d)_"
+    r"(?P<year>\d\d\d\d)"
+    r"(?P<month>\d\d)"
+    r"(?P<day>\d\d)_"
+    r"(?P<processing_year>\d\d\d\d)"
+    r"(?P<processing_month>\d\d)"
+    r"(?P<processing_day>\d\d)_"
+    r"(?P<collection>\d\d)_"
+    r"(?P<category>RT|T1|T2)"
+)
+
+
+def check_gpus(element: Any, gpus_optional: bool) -> Any:
+    """Makes sure TensorFlow detects GPUs, otherwise raise a RuntimeError.
+
+    Note that this function must be run within a PTransform like beam.Map so
+    we are sure it's run by the workers, and not the launcher process.
+
+    Args:
+        element: An element
+        gpus_optional: If True, the pipeline won't crash if GPUs are not found.
+
+    Returns:
+        The same element it received as is.
+
+    Raises:
+        RuntimeError: If no GPUs were found by TensorFlow.
+    """
+    # Make sure we have a GPU available.
+    gpu_devices = tf.config.list_physical_devices("GPU")
+    logging.info(f"GPU devices: {gpu_devices}")
+    if len(gpu_devices) == 0:
+        if gpus_optional:
+            logging.warning("No GPUs found, defaulting to CPU.")
+        else:
+            raise RuntimeError("No GPUs found.")
+    return element
+
+
+def get_band_paths(
+    scene: str, band_names: List[str], unused_side_input: Any
+) -> Tuple[str, List[str]]:
+    """Gets the Cloud Storage paths for each band in a Landsat scene.
+
+    Args:
+        scene: Landsat 8 scene ID.
+        band_names: List of the band names corresponding to [Red, Green, Blue] channels.
+        unused_side_input: Used to wait for the GPU check, can be safely ignored.
+
+    Returns:
+        A (scene, band_paths) pair.
+
+    Raises:
+        ValueError: If the scene or a band does not exist.
+    """
+    # Extract the metadata from the scene ID using a regular expression.
+    m = SCENE_RE.match(scene)
+    if not m:
+        raise ValueError(f"invalid scene ID: {scene}")
+
+    g = m.groupdict()
+    scene_dir = f"gs://gcp-public-data-landsat/{g['sensor']}/{g['collection']}/{g['wrs_path']}/{g['wrs_row']}/{scene}"
+
+    band_paths = [f"{scene_dir}/{scene}_{band_name}.TIF" for band_name in band_names]
+
+    for band_path in band_paths:
+        if not tf.io.gfile.exists(band_path):
+            raise ValueError(f"failed to load: {band_path}")
+
+    return scene, band_paths
+
+
+def load_values(scene: str, band_paths: List[str]) -> Tuple[str, np.ndarray]:
+    """Loads a scene's bands data as a numpy array.
+
+    Args:
+        scene: Landsat 8 scene ID.
+        band_paths: A list of the [Red, Green, Blue] band paths.
+
+    Returns:
+        A (scene, values) pair.
+
+        The values are stored in a three-dimensional float32 array with shape:
+            (band, width, height)
+    """
+
+    def read_band(band_path: str) -> np.array:
+        # Use rasterio to read the GeoTIFF values from the band files.
+        with tf.io.gfile.GFile(band_path, "rb") as f, rasterio.open(f) as data:
+            return data.read(1)
+
+    logging.info(f"{scene}: load_values({band_paths})")
+    values = [read_band(band_path) for band_path in band_paths]
+    return scene, np.array(values, np.float32)
+
+
+def preprocess_pixels(
+    scene: str,
+    values: np.ndarray,
+    min_value: float = 0.0,
+    max_value: float = 1.0,
+    gamma: float = 1.0,
+) -> Tuple[str, tf.Tensor]:
+    """Prepares the band data into a pixel-ready format for an RGB image.
+
+    The input band values come in the shape (band, width, height) with
+    unbounded positive numbers depending on the sensor's exposure.
+    The values are reshaped into (width, height, band), the values are clamped
+    to integers between 0 and 255, and a gamma correction value is applied.
+
+    Args:
+        scene: Landsat 8 scene ID.
+        values: Band values in the shape (band, width, height).
+        min_value: Minimum band value.
+        max_value: Maximum band value.
+        gamma: Gamma correction value.
+
+    Returns:
+        A (scene, pixels) pair. The pixels are Image-ready values.
+    """
+    logging.info(
+        f"{scene}: preprocess_pixels({values.shape}:{values.dtype}, min={min_value}, max={max_value}, gamma={gamma})"
+    )
+
+    # Reshape (band, width, height) into (width, height, band).
+    pixels = tf.transpose(values, (1, 2, 0))
+
+    # Rescale to values from 0.0 to 1.0 and clamp them into that range.
+    pixels -= min_value
+    pixels /= max_value
+    pixels = tf.clip_by_value(pixels, 0.0, 1.0)
+
+    # Apply gamma correction.
+    pixels **= 1.0 / gamma
+
+    # Return the pixel values as int8 in the range from 0 to 255,
+    # which is what PIL.Image expects.
+    return scene, tf.cast(pixels * 255.0, dtype=tf.uint8)
+
+
+def save_to_gcs(
+    scene: str, image: Image.Image, output_path_prefix: str, format: str = "JPEG"
+) -> None:
+    """Saves a PIL.Image as a JPEG file in the desired path.
+
+    Args:
+        scene: Landsat 8 scene ID.
+        image: A PIL.Image object.
+        output_path_prefix: Path prefix to save the output files.
+        format: Image format to save files.
+    """
+    filename = os.path.join(output_path_prefix, scene + "." + format.lower())
+    with tf.io.gfile.GFile(filename, "w") as f:
+        image.save(f, format)
+
+
+def run(
+    scenes: List[str],
+    output_path_prefix: str,
+    vis_params: Dict[str, Any],
+    gpus_optional: bool,
+    beam_args: Optional[List[str]] = None,
+) -> None:
+    """Load multiple Landsat scenes and render them as JPEG files.
+
+    Args:
+        scenes: List of Landsat 8 scene IDs.
+        output_path_prefix: Path prefix to save the output files.
+        vis_params: Visualization parameters including {rgb_bands, min, max, gamma}.
+        gpus_optional: If True, the pipeline won't crash if GPUs are not found.
+        beam_args: Optional list of arguments for Beam pipeline options.
+    """
+    rgb_band_names = vis_params["rgb_band_names"]
+    min_value = vis_params["min"]
+    max_value = vis_params["max"]
+    gamma = vis_params["gamma"]
+
+    options = PipelineOptions(beam_args, save_main_session=True)
+    with beam.Pipeline(options=options) as pipeline:
+        # Optionally, validate that the workers are using GPUs.
+        gpu_check = (
+            pipeline
+            | beam.Create([None])
+            | "Check GPU availability" >> beam.Map(check_gpus, gpus_optional)
+        )
+
+        # Convert Landsat 8 scenes into images.
+        # ℹ️ We pass `gpu_check` as an unused side input to force that step in
+        # the pipeline to wait for the check before continuing.
+        (
+            pipeline
+            | "Create scene IDs" >> beam.Create(scenes)
+            | "Get RGB band paths"
+            >> beam.Map(
+                get_band_paths,
+                rgb_band_names,
+                unused_side_input=beam.pvalue.AsSingleton(gpu_check),
+            )
+            | "Load RGB band values" >> beam.MapTuple(load_values)
+            | "Preprocess pixels"
+            >> beam.MapTuple(preprocess_pixels, min_value, max_value, gamma)
+            | "Convert to image"
+            >> beam.MapTuple(
+                lambda scene, rgb_pixels: (
+                    scene,
+                    Image.fromarray(rgb_pixels.numpy(), mode="RGB"),
+                )
+            )
+            | "Save to Cloud Storage" >> beam.MapTuple(save_to_gcs, output_path_prefix)
+        )
+
+
+if __name__ == "__main__":
+    logging.getLogger().setLevel(logging.INFO)
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--output-path-prefix",
+        required=True,
+        help="Path prefix for output image files. "
+        "This can be a Google Cloud Storage path.",
+    )
+    parser.add_argument(
+        "--scene",
+        dest="scenes",
+        action="append",
+        help="One or more Landsat scene IDs to process, for example "
+        "LC08_L1TP_109078_20200411_20200422_01_T1. "
+        "They must be in the format: "
+        "https://www.usgs.gov/faqs/what-naming-convention-landsat-collections-level-1-scenes",
+    )
+    parser.add_argument(
+        "--rgb-band-names",
+        nargs=3,
+        default=DEFAULT_RGB_BAND_NAMES,
+        help="List of three band names to be mapped to the RGB channels.",
+    )
+    parser.add_argument(
+        "--min",
+        type=float,
+        default=DEFAULT_MIN_BAND_VALUE,
+        help="Minimum value of the band value range.",
+    )
+    parser.add_argument(
+        "--max",
+        type=float,
+        default=DEFAULT_MAX_BAND_VALUE,
+        help="Maximum value of the band value range.",
+    )
+    parser.add_argument(
+        "--gamma", type=float, default=DEFAULT_GAMMA, help="Gamma correction factor."
+    )
+    parser.add_argument(
+        "--gpus-optional",
+        action="store_true",
+        help="If set, the pipeline won't crash if GPUs are not found.",
+    )
+    args, beam_args = parser.parse_known_args()
+
+    scenes = args.scenes or DEFAULT_SCENES
+    vis_params = {
+        "rgb_band_names": args.rgb_band_names,
+        "min": args.min,
+        "max": args.max,
+        "gamma": args.gamma,
+    }
+    run(scenes, args.output_path_prefix, vis_params, args.gpus_optional, beam_args)
diff --git a/dataflow/gpu-workers/noxfile_config.py b/dataflow/gpu-workers/noxfile_config.py
new file mode 100644
index 00000000000..74d736256c6
--- /dev/null
+++ b/dataflow/gpu-workers/noxfile_config.py
@@ -0,0 +1,38 @@
+# Copyright 2020 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Default TEST_CONFIG_OVERRIDE for python repos.
+
+# You can copy this file into your directory, then it will be imported from
+# the noxfile.py.
+
+# The source of truth:
+# https://github.com/GoogleCloudPlatform/python-docs-samples/blob/master/noxfile_config.py
+
+TEST_CONFIG_OVERRIDE = {
+    # You can opt out from the test for specific Python versions.
+    "ignored_versions": ["2.7", "3.9"],
+    # Old samples are opted out of enforcing Python type hints
+    # All new samples should feature them
+    "enforce_type_hints": True,
+    # An envvar key for determining the project id to use. Change it
+    # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a
+    # build specific Cloud project. You can also use your own string
+    # to use your own Cloud project.
+    "gcloud_project_env": "GOOGLE_CLOUD_PROJECT",
+    # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT',
+    # A dictionary you want to inject into your test. Don't put any
+    # secrets here. These values will override predefined values.
+    "envs": {},
+}
diff --git a/dataflow/gpu-workers/requirements-test.txt b/dataflow/gpu-workers/requirements-test.txt
new file mode 100644
index 00000000000..9782f5d8d54
--- /dev/null
+++ b/dataflow/gpu-workers/requirements-test.txt
@@ -0,0 +1,2 @@
+google-cloud-storage==1.38.0
+pytest==6.2.4
diff --git a/dataflow/gpu-workers/requirements.txt b/dataflow/gpu-workers/requirements.txt
new file mode 100644
index 00000000000..1823ef09b96
--- /dev/null
+++ b/dataflow/gpu-workers/requirements.txt
@@ -0,0 +1,4 @@
+Pillow==8.2.0
+apache-beam[gcp]==2.29.0
+rasterio==1.2.4
+tensorflow==2.5.0

From 5fc27b432ed7ef0a39b44e9fed564ab607ce171d Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 23 Jun 2021 22:11:29 +0000
Subject: [PATCH 70/87] update copyright year

---
 dataflow/gpu-workers/tensorflow-landsat/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/gpu-workers/tensorflow-landsat/main.py b/dataflow/gpu-workers/tensorflow-landsat/main.py
index 6afa31522c3..408268dfa6d 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/main.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/main.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-# Copyright 2020 Google LLC
+# Copyright 2021 Google LLC
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.

From 1f005aeae33a4ee0502f1378f1a149838b5a0d0c Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Wed, 23 Jun 2021 22:17:41 +0000
Subject: [PATCH 71/87] fix lint issues

---
 dataflow/gpu-workers/conftest.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/dataflow/gpu-workers/conftest.py b/dataflow/gpu-workers/conftest.py
index 14c568548af..89b7ca2d6ea 100644
--- a/dataflow/gpu-workers/conftest.py
+++ b/dataflow/gpu-workers/conftest.py
@@ -16,8 +16,8 @@
 import logging
 import multiprocessing as mp
 import os
-import re
 import platform
+import re
 import subprocess
 import sys
 import time
@@ -325,8 +325,8 @@ def dataflow_jobs_wait(
         region: str = REGION,
         until_status: str = "JOB_STATE_DONE",
         timeout_sec: str = 30 * 60,
-        poll_interval_sec=60,
-        list_page_size=100,
+        poll_interval_sec: int = 60,
+        list_page_size: int = 100,
     ) -> Optional[str]:
         """For a list of all the valid states:
         https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState

From ea834469a7b7d81d6fe9385665652d06b8ca4d8e Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 24 Jun 2021 15:57:13 +0000
Subject: [PATCH 72/87] renamed test file

---
 dataflow/gpu-workers/{e2e_test.py => landsat_test.py} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename dataflow/gpu-workers/{e2e_test.py => landsat_test.py} (100%)

diff --git a/dataflow/gpu-workers/e2e_test.py b/dataflow/gpu-workers/landsat_test.py
similarity index 100%
rename from dataflow/gpu-workers/e2e_test.py
rename to dataflow/gpu-workers/landsat_test.py

From 83622c09479e39b5e14cf9626915b267e355cd12 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 24 Jun 2021 15:57:56 +0000
Subject: [PATCH 73/87] update beam version

---
 dataflow/gpu-workers/requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/gpu-workers/requirements.txt b/dataflow/gpu-workers/requirements.txt
index 1823ef09b96..cbfaaf47f75 100644
--- a/dataflow/gpu-workers/requirements.txt
+++ b/dataflow/gpu-workers/requirements.txt
@@ -1,4 +1,4 @@
 Pillow==8.2.0
-apache-beam[gcp]==2.29.0
+apache-beam[gcp]==2.30.0
 rasterio==1.2.4
 tensorflow==2.5.0

From 3765572eaf334242c11ef86392b850912c33d81d Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 24 Jun 2021 18:10:34 +0000
Subject: [PATCH 74/87] renamed test files

---
 dataflow/gpu-workers/{landsat_test.py => e2e_test.py}             | 0
 .../gpu-workers/pytorch-minimal/{e2e_test.py => pytorch_test.py}  | 0
 .../tensorflow-landsat/{e2e_test.py => landsat_test.py}           | 0
 .../tensorflow-minimal/{e2e_test.py => tensorflow_test.py}        | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename dataflow/gpu-workers/{landsat_test.py => e2e_test.py} (100%)
 rename dataflow/gpu-workers/pytorch-minimal/{e2e_test.py => pytorch_test.py} (100%)
 rename dataflow/gpu-workers/tensorflow-landsat/{e2e_test.py => landsat_test.py} (100%)
 rename dataflow/gpu-workers/tensorflow-minimal/{e2e_test.py => tensorflow_test.py} (100%)

diff --git a/dataflow/gpu-workers/landsat_test.py b/dataflow/gpu-workers/e2e_test.py
similarity index 100%
rename from dataflow/gpu-workers/landsat_test.py
rename to dataflow/gpu-workers/e2e_test.py
diff --git a/dataflow/gpu-workers/pytorch-minimal/e2e_test.py b/dataflow/gpu-workers/pytorch-minimal/pytorch_test.py
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/e2e_test.py
rename to dataflow/gpu-workers/pytorch-minimal/pytorch_test.py
diff --git a/dataflow/gpu-workers/tensorflow-landsat/e2e_test.py b/dataflow/gpu-workers/tensorflow-landsat/landsat_test.py
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/e2e_test.py
rename to dataflow/gpu-workers/tensorflow-landsat/landsat_test.py
diff --git a/dataflow/gpu-workers/tensorflow-minimal/e2e_test.py b/dataflow/gpu-workers/tensorflow-minimal/tensorflow_test.py
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/e2e_test.py
rename to dataflow/gpu-workers/tensorflow-minimal/tensorflow_test.py

From 16f207bec899de426c4bddb688aad1bad2ff91b8 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 24 Jun 2021 19:51:42 +0000
Subject: [PATCH 75/87] adjust to conftest behaving differently with tests in
 current directory

---
 dataflow/gpu-workers/conftest.py              | 33 +++++++++++--------
 .../pytorch-minimal/pytorch_test.py           |  4 +--
 .../tensorflow-landsat/landsat_test.py        |  4 +--
 .../tensorflow-minimal/tensorflow_test.py     |  4 +--
 4 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/dataflow/gpu-workers/conftest.py b/dataflow/gpu-workers/conftest.py
index 89b7ca2d6ea..ed7387d209f 100644
--- a/dataflow/gpu-workers/conftest.py
+++ b/dataflow/gpu-workers/conftest.py
@@ -208,20 +208,25 @@ def cloud_build_submit(
             cmd_substitutions = []
 
         if config:
-            with open(config) as f:
-                cmd = [
-                    "gcloud",
-                    "builds",
-                    "submit",
-                    f"--project={project}",
-                    f"--config={config}",
-                    *cmd_substitutions,
-                    source,
-                ]
-                logging.info(f"{cmd}")
-                subprocess.run(cmd, check=True)
-                logging.info(f"Cloud build finished successfully: {config}")
-                yield f.read()
+            try:
+                with open(config) as f:
+                    cmd = [
+                        "gcloud",
+                        "builds",
+                        "submit",
+                        f"--project={project}",
+                        f"--config={config}",
+                        *cmd_substitutions,
+                        source,
+                    ]
+                    logging.info(f"{cmd}")
+                    subprocess.run(cmd, check=True)
+                    logging.info(f"Cloud build finished successfully: {config}")
+                    yield f.read()
+            except Exception as e:
+                logging.exception(e)
+                logging.warning(f'Current directory: {os.getcwd()}')
+                yield config
         elif image_name:
             cmd = [
                 "gcloud",
diff --git a/dataflow/gpu-workers/pytorch-minimal/pytorch_test.py b/dataflow/gpu-workers/pytorch-minimal/pytorch_test.py
index 52d6a2c7ab0..039f7b70b32 100644
--- a/dataflow/gpu-workers/pytorch-minimal/pytorch_test.py
+++ b/dataflow/gpu-workers/pytorch-minimal/pytorch_test.py
@@ -34,7 +34,7 @@ def bucket_name(utils: Utils) -> str:
 def build_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(
         image_name=NAME,
-        config="build.yaml",
+        config="pytorch-minimal/build.yaml",
         substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
     )
 
@@ -43,7 +43,7 @@ def build_image(utils: Utils) -> str:
 def run_dataflow_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
-        config="run.yaml",
+        config="pytorch-minimal/run.yaml",
         substitutions={
             "_JOB_NAME": utils.hyphen_name(NAME),
             "_IMAGE": f"{NAME}:{utils.uuid}",
diff --git a/dataflow/gpu-workers/tensorflow-landsat/landsat_test.py b/dataflow/gpu-workers/tensorflow-landsat/landsat_test.py
index 972fe627f42..d6eab544cf9 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/landsat_test.py
+++ b/dataflow/gpu-workers/tensorflow-landsat/landsat_test.py
@@ -35,7 +35,7 @@ def bucket_name(utils: Utils) -> str:
 def build_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(
         image_name=NAME,
-        config="build.yaml",
+        config="tensorflow-landsat/build.yaml",
         substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
     )
 
@@ -44,7 +44,7 @@ def build_image(utils: Utils) -> str:
 def run_dataflow_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
-        config="run.yaml",
+        config="tensorflow-landsat/run.yaml",
         substitutions={
             "_JOB_NAME": utils.hyphen_name(NAME),
             "_IMAGE": f"{NAME}:{utils.uuid}",
diff --git a/dataflow/gpu-workers/tensorflow-minimal/tensorflow_test.py b/dataflow/gpu-workers/tensorflow-minimal/tensorflow_test.py
index 6c890550d68..dfd9236bad3 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/tensorflow_test.py
+++ b/dataflow/gpu-workers/tensorflow-minimal/tensorflow_test.py
@@ -34,7 +34,7 @@ def bucket_name(utils: Utils) -> str:
 def build_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(
         image_name=NAME,
-        config="build.yaml",
+        config="tensorflow-minimal/build.yaml",
         substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
     )
 
@@ -43,7 +43,7 @@ def build_image(utils: Utils) -> str:
 def run_dataflow_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
-        config="run.yaml",
+        config="tensorflow-minimal/run.yaml",
         substitutions={
             "_JOB_NAME": utils.hyphen_name(NAME),
             "_IMAGE": f"{NAME}:{utils.uuid}",

From 51fdb72ffa591eb64f504542b2ac86d1b2583019 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Fri, 25 Jun 2021 15:48:42 +0000
Subject: [PATCH 76/87] moved new samples to another directory

---
 dataflow/{gpu-workers => gpu-examples}/conftest.py          | 0
 .../pytorch-minimal/.dockerignore                           | 0
 .../pytorch-minimal/.gcloudignore                           | 0
 .../pytorch-minimal/Dockerfile                              | 0
 .../{gpu-workers => gpu-examples}/pytorch-minimal/README.md | 0
 .../pytorch-minimal/build.yaml                              | 0
 .../pytorch-minimal/e2e_test.py}                            | 6 +++---
 .../{gpu-workers => gpu-examples}/pytorch-minimal/main.py   | 0
 .../pytorch-minimal/noxfile_config.py                       | 0
 .../pytorch-minimal/requirements-test.txt                   | 0
 .../pytorch-minimal/requirements.txt                        | 0
 .../{gpu-workers => gpu-examples}/pytorch-minimal/run.yaml  | 0
 .../tensorflow-landsat/.dockerignore                        | 0
 .../tensorflow-landsat/.gcloudignore                        | 0
 .../tensorflow-landsat/Dockerfile                           | 0
 .../tensorflow-landsat/README.md                            | 0
 .../tensorflow-landsat/build.yaml                           | 0
 .../tensorflow-landsat/e2e_test.py}                         | 6 +++---
 .../tensorflow-landsat/main.py                              | 0
 .../tensorflow-landsat/noxfile_config.py                    | 0
 .../tensorflow-landsat/requirements-test.txt                | 0
 .../tensorflow-landsat/requirements.txt                     | 0
 .../tensorflow-landsat/run.yaml                             | 0
 .../tensorflow-minimal/.dockerignore                        | 0
 .../tensorflow-minimal/.gcloudignore                        | 0
 .../tensorflow-minimal/Dockerfile                           | 0
 .../tensorflow-minimal/README.md                            | 0
 .../tensorflow-minimal/build.yaml                           | 0
 .../tensorflow-minimal/e2e_test.py}                         | 6 +++---
 .../tensorflow-minimal/main.py                              | 0
 .../tensorflow-minimal/noxfile_config.py                    | 0
 .../tensorflow-minimal/requirements-test.txt                | 0
 .../tensorflow-minimal/requirements.txt                     | 0
 .../tensorflow-minimal/run.yaml                             | 0
 34 files changed, 9 insertions(+), 9 deletions(-)
 rename dataflow/{gpu-workers => gpu-examples}/conftest.py (100%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/.dockerignore (100%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/.gcloudignore (100%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/Dockerfile (100%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/README.md (100%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/build.yaml (100%)
 rename dataflow/{gpu-workers/pytorch-minimal/pytorch_test.py => gpu-examples/pytorch-minimal/e2e_test.py} (93%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/main.py (100%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/noxfile_config.py (100%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/requirements-test.txt (100%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/requirements.txt (100%)
 rename dataflow/{gpu-workers => gpu-examples}/pytorch-minimal/run.yaml (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/.dockerignore (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/.gcloudignore (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/Dockerfile (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/README.md (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/build.yaml (100%)
 rename dataflow/{gpu-workers/tensorflow-landsat/landsat_test.py => gpu-examples/tensorflow-landsat/e2e_test.py} (94%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/main.py (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/noxfile_config.py (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/requirements-test.txt (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/requirements.txt (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-landsat/run.yaml (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/.dockerignore (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/.gcloudignore (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/Dockerfile (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/README.md (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/build.yaml (100%)
 rename dataflow/{gpu-workers/tensorflow-minimal/tensorflow_test.py => gpu-examples/tensorflow-minimal/e2e_test.py} (93%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/main.py (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/noxfile_config.py (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/requirements-test.txt (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/requirements.txt (100%)
 rename dataflow/{gpu-workers => gpu-examples}/tensorflow-minimal/run.yaml (100%)

diff --git a/dataflow/gpu-workers/conftest.py b/dataflow/gpu-examples/conftest.py
similarity index 100%
rename from dataflow/gpu-workers/conftest.py
rename to dataflow/gpu-examples/conftest.py
diff --git a/dataflow/gpu-workers/pytorch-minimal/.dockerignore b/dataflow/gpu-examples/pytorch-minimal/.dockerignore
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/.dockerignore
rename to dataflow/gpu-examples/pytorch-minimal/.dockerignore
diff --git a/dataflow/gpu-workers/pytorch-minimal/.gcloudignore b/dataflow/gpu-examples/pytorch-minimal/.gcloudignore
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/.gcloudignore
rename to dataflow/gpu-examples/pytorch-minimal/.gcloudignore
diff --git a/dataflow/gpu-workers/pytorch-minimal/Dockerfile b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/Dockerfile
rename to dataflow/gpu-examples/pytorch-minimal/Dockerfile
diff --git a/dataflow/gpu-workers/pytorch-minimal/README.md b/dataflow/gpu-examples/pytorch-minimal/README.md
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/README.md
rename to dataflow/gpu-examples/pytorch-minimal/README.md
diff --git a/dataflow/gpu-workers/pytorch-minimal/build.yaml b/dataflow/gpu-examples/pytorch-minimal/build.yaml
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/build.yaml
rename to dataflow/gpu-examples/pytorch-minimal/build.yaml
diff --git a/dataflow/gpu-workers/pytorch-minimal/pytorch_test.py b/dataflow/gpu-examples/pytorch-minimal/e2e_test.py
similarity index 93%
rename from dataflow/gpu-workers/pytorch-minimal/pytorch_test.py
rename to dataflow/gpu-examples/pytorch-minimal/e2e_test.py
index 039f7b70b32..41a127e4fee 100644
--- a/dataflow/gpu-workers/pytorch-minimal/pytorch_test.py
+++ b/dataflow/gpu-examples/pytorch-minimal/e2e_test.py
@@ -22,7 +22,7 @@
     Utils = None
 import pytest
 
-NAME = "dataflow/gpu-workers/pytorch-minimal"
+NAME = "dataflow/gpu-examples/pytorch-minimal"
 
 
 @pytest.fixture(scope="session")
@@ -34,7 +34,7 @@ def bucket_name(utils: Utils) -> str:
 def build_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(
         image_name=NAME,
-        config="pytorch-minimal/build.yaml",
+        config="build.yaml",
         substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
     )
 
@@ -43,7 +43,7 @@ def build_image(utils: Utils) -> str:
 def run_dataflow_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
-        config="pytorch-minimal/run.yaml",
+        config="run.yaml",
         substitutions={
             "_JOB_NAME": utils.hyphen_name(NAME),
             "_IMAGE": f"{NAME}:{utils.uuid}",
diff --git a/dataflow/gpu-workers/pytorch-minimal/main.py b/dataflow/gpu-examples/pytorch-minimal/main.py
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/main.py
rename to dataflow/gpu-examples/pytorch-minimal/main.py
diff --git a/dataflow/gpu-workers/pytorch-minimal/noxfile_config.py b/dataflow/gpu-examples/pytorch-minimal/noxfile_config.py
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/noxfile_config.py
rename to dataflow/gpu-examples/pytorch-minimal/noxfile_config.py
diff --git a/dataflow/gpu-workers/pytorch-minimal/requirements-test.txt b/dataflow/gpu-examples/pytorch-minimal/requirements-test.txt
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/requirements-test.txt
rename to dataflow/gpu-examples/pytorch-minimal/requirements-test.txt
diff --git a/dataflow/gpu-workers/pytorch-minimal/requirements.txt b/dataflow/gpu-examples/pytorch-minimal/requirements.txt
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/requirements.txt
rename to dataflow/gpu-examples/pytorch-minimal/requirements.txt
diff --git a/dataflow/gpu-workers/pytorch-minimal/run.yaml b/dataflow/gpu-examples/pytorch-minimal/run.yaml
similarity index 100%
rename from dataflow/gpu-workers/pytorch-minimal/run.yaml
rename to dataflow/gpu-examples/pytorch-minimal/run.yaml
diff --git a/dataflow/gpu-workers/tensorflow-landsat/.dockerignore b/dataflow/gpu-examples/tensorflow-landsat/.dockerignore
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/.dockerignore
rename to dataflow/gpu-examples/tensorflow-landsat/.dockerignore
diff --git a/dataflow/gpu-workers/tensorflow-landsat/.gcloudignore b/dataflow/gpu-examples/tensorflow-landsat/.gcloudignore
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/.gcloudignore
rename to dataflow/gpu-examples/tensorflow-landsat/.gcloudignore
diff --git a/dataflow/gpu-workers/tensorflow-landsat/Dockerfile b/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/Dockerfile
rename to dataflow/gpu-examples/tensorflow-landsat/Dockerfile
diff --git a/dataflow/gpu-workers/tensorflow-landsat/README.md b/dataflow/gpu-examples/tensorflow-landsat/README.md
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/README.md
rename to dataflow/gpu-examples/tensorflow-landsat/README.md
diff --git a/dataflow/gpu-workers/tensorflow-landsat/build.yaml b/dataflow/gpu-examples/tensorflow-landsat/build.yaml
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/build.yaml
rename to dataflow/gpu-examples/tensorflow-landsat/build.yaml
diff --git a/dataflow/gpu-workers/tensorflow-landsat/landsat_test.py b/dataflow/gpu-examples/tensorflow-landsat/e2e_test.py
similarity index 94%
rename from dataflow/gpu-workers/tensorflow-landsat/landsat_test.py
rename to dataflow/gpu-examples/tensorflow-landsat/e2e_test.py
index d6eab544cf9..21de08de240 100644
--- a/dataflow/gpu-workers/tensorflow-landsat/landsat_test.py
+++ b/dataflow/gpu-examples/tensorflow-landsat/e2e_test.py
@@ -23,7 +23,7 @@
 from google.cloud import storage
 import pytest
 
-NAME = "dataflow/gpu-workers/tensorflow-landsat"
+NAME = "dataflow/gpu-examples/tensorflow-landsat"
 
 
 @pytest.fixture(scope="session")
@@ -35,7 +35,7 @@ def bucket_name(utils: Utils) -> str:
 def build_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(
         image_name=NAME,
-        config="tensorflow-landsat/build.yaml",
+        config="build.yaml",
         substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
     )
 
@@ -44,7 +44,7 @@ def build_image(utils: Utils) -> str:
 def run_dataflow_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
-        config="tensorflow-landsat/run.yaml",
+        config="run.yaml",
         substitutions={
             "_JOB_NAME": utils.hyphen_name(NAME),
             "_IMAGE": f"{NAME}:{utils.uuid}",
diff --git a/dataflow/gpu-workers/tensorflow-landsat/main.py b/dataflow/gpu-examples/tensorflow-landsat/main.py
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/main.py
rename to dataflow/gpu-examples/tensorflow-landsat/main.py
diff --git a/dataflow/gpu-workers/tensorflow-landsat/noxfile_config.py b/dataflow/gpu-examples/tensorflow-landsat/noxfile_config.py
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/noxfile_config.py
rename to dataflow/gpu-examples/tensorflow-landsat/noxfile_config.py
diff --git a/dataflow/gpu-workers/tensorflow-landsat/requirements-test.txt b/dataflow/gpu-examples/tensorflow-landsat/requirements-test.txt
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/requirements-test.txt
rename to dataflow/gpu-examples/tensorflow-landsat/requirements-test.txt
diff --git a/dataflow/gpu-workers/tensorflow-landsat/requirements.txt b/dataflow/gpu-examples/tensorflow-landsat/requirements.txt
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/requirements.txt
rename to dataflow/gpu-examples/tensorflow-landsat/requirements.txt
diff --git a/dataflow/gpu-workers/tensorflow-landsat/run.yaml b/dataflow/gpu-examples/tensorflow-landsat/run.yaml
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-landsat/run.yaml
rename to dataflow/gpu-examples/tensorflow-landsat/run.yaml
diff --git a/dataflow/gpu-workers/tensorflow-minimal/.dockerignore b/dataflow/gpu-examples/tensorflow-minimal/.dockerignore
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/.dockerignore
rename to dataflow/gpu-examples/tensorflow-minimal/.dockerignore
diff --git a/dataflow/gpu-workers/tensorflow-minimal/.gcloudignore b/dataflow/gpu-examples/tensorflow-minimal/.gcloudignore
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/.gcloudignore
rename to dataflow/gpu-examples/tensorflow-minimal/.gcloudignore
diff --git a/dataflow/gpu-workers/tensorflow-minimal/Dockerfile b/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/Dockerfile
rename to dataflow/gpu-examples/tensorflow-minimal/Dockerfile
diff --git a/dataflow/gpu-workers/tensorflow-minimal/README.md b/dataflow/gpu-examples/tensorflow-minimal/README.md
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/README.md
rename to dataflow/gpu-examples/tensorflow-minimal/README.md
diff --git a/dataflow/gpu-workers/tensorflow-minimal/build.yaml b/dataflow/gpu-examples/tensorflow-minimal/build.yaml
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/build.yaml
rename to dataflow/gpu-examples/tensorflow-minimal/build.yaml
diff --git a/dataflow/gpu-workers/tensorflow-minimal/tensorflow_test.py b/dataflow/gpu-examples/tensorflow-minimal/e2e_test.py
similarity index 93%
rename from dataflow/gpu-workers/tensorflow-minimal/tensorflow_test.py
rename to dataflow/gpu-examples/tensorflow-minimal/e2e_test.py
index dfd9236bad3..b78d9c49b88 100644
--- a/dataflow/gpu-workers/tensorflow-minimal/tensorflow_test.py
+++ b/dataflow/gpu-examples/tensorflow-minimal/e2e_test.py
@@ -22,7 +22,7 @@
     Utils = None
 import pytest
 
-NAME = "dataflow/gpu-workers/tensorflow-minimal"
+NAME = "dataflow/gpu-examples/tensorflow-minimal"
 
 
 @pytest.fixture(scope="session")
@@ -34,7 +34,7 @@ def bucket_name(utils: Utils) -> str:
 def build_image(utils: Utils) -> str:
     yield from utils.cloud_build_submit(
         image_name=NAME,
-        config="tensorflow-minimal/build.yaml",
+        config="build.yaml",
         substitutions={"_IMAGE": f"{NAME}:{utils.uuid}"},
     )
 
@@ -43,7 +43,7 @@ def build_image(utils: Utils) -> str:
 def run_dataflow_job(utils: Utils, bucket_name: str, build_image: str) -> str:
     # Run the Beam pipeline in Dataflow making sure GPUs are used.
     yield from utils.cloud_build_submit(
-        config="tensorflow-minimal/run.yaml",
+        config="run.yaml",
         substitutions={
             "_JOB_NAME": utils.hyphen_name(NAME),
             "_IMAGE": f"{NAME}:{utils.uuid}",
diff --git a/dataflow/gpu-workers/tensorflow-minimal/main.py b/dataflow/gpu-examples/tensorflow-minimal/main.py
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/main.py
rename to dataflow/gpu-examples/tensorflow-minimal/main.py
diff --git a/dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py b/dataflow/gpu-examples/tensorflow-minimal/noxfile_config.py
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/noxfile_config.py
rename to dataflow/gpu-examples/tensorflow-minimal/noxfile_config.py
diff --git a/dataflow/gpu-workers/tensorflow-minimal/requirements-test.txt b/dataflow/gpu-examples/tensorflow-minimal/requirements-test.txt
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/requirements-test.txt
rename to dataflow/gpu-examples/tensorflow-minimal/requirements-test.txt
diff --git a/dataflow/gpu-workers/tensorflow-minimal/requirements.txt b/dataflow/gpu-examples/tensorflow-minimal/requirements.txt
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/requirements.txt
rename to dataflow/gpu-examples/tensorflow-minimal/requirements.txt
diff --git a/dataflow/gpu-workers/tensorflow-minimal/run.yaml b/dataflow/gpu-examples/tensorflow-minimal/run.yaml
similarity index 100%
rename from dataflow/gpu-workers/tensorflow-minimal/run.yaml
rename to dataflow/gpu-examples/tensorflow-minimal/run.yaml

From 50f3d44c43c1d19288d41fc63ccec018bce484d4 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Fri, 25 Jun 2021 18:22:18 +0000
Subject: [PATCH 77/87] simplified instructions

---
 dataflow/gpu-examples/pytorch-minimal/.dockerignore    | 5 -----
 dataflow/gpu-examples/pytorch-minimal/.gcloudignore    | 5 -----
 dataflow/gpu-examples/pytorch-minimal/README.md        | 2 +-
 dataflow/gpu-examples/tensorflow-landsat/.dockerignore | 5 -----
 dataflow/gpu-examples/tensorflow-landsat/.gcloudignore | 5 -----
 dataflow/gpu-examples/tensorflow-landsat/README.md     | 2 +-
 dataflow/gpu-examples/tensorflow-minimal/.dockerignore | 5 -----
 dataflow/gpu-examples/tensorflow-minimal/.gcloudignore | 5 -----
 dataflow/gpu-examples/tensorflow-minimal/README.md     | 2 +-
 9 files changed, 3 insertions(+), 33 deletions(-)
 delete mode 100644 dataflow/gpu-examples/pytorch-minimal/.dockerignore
 delete mode 100644 dataflow/gpu-examples/pytorch-minimal/.gcloudignore
 delete mode 100644 dataflow/gpu-examples/tensorflow-landsat/.dockerignore
 delete mode 100644 dataflow/gpu-examples/tensorflow-landsat/.gcloudignore
 delete mode 100644 dataflow/gpu-examples/tensorflow-minimal/.dockerignore
 delete mode 100644 dataflow/gpu-examples/tensorflow-minimal/.gcloudignore

diff --git a/dataflow/gpu-examples/pytorch-minimal/.dockerignore b/dataflow/gpu-examples/pytorch-minimal/.dockerignore
deleted file mode 100644
index 775d845fa58..00000000000
--- a/dataflow/gpu-examples/pytorch-minimal/.dockerignore
+++ /dev/null
@@ -1,5 +0,0 @@
-# Ignore everything except the source files.
-**/*
-!Dockerfile
-!requirements.txt
-!*.py
diff --git a/dataflow/gpu-examples/pytorch-minimal/.gcloudignore b/dataflow/gpu-examples/pytorch-minimal/.gcloudignore
deleted file mode 100644
index 775d845fa58..00000000000
--- a/dataflow/gpu-examples/pytorch-minimal/.gcloudignore
+++ /dev/null
@@ -1,5 +0,0 @@
-# Ignore everything except the source files.
-**/*
-!Dockerfile
-!requirements.txt
-!*.py
diff --git a/dataflow/gpu-examples/pytorch-minimal/README.md b/dataflow/gpu-examples/pytorch-minimal/README.md
index 43e24830529..cb638cad308 100644
--- a/dataflow/gpu-examples/pytorch-minimal/README.md
+++ b/dataflow/gpu-examples/pytorch-minimal/README.md
@@ -27,7 +27,7 @@ We use Cloud Build to run the [Dataflow](https://cloud.google.com/dataflow) job.
 export REGION="us-central1"
 export GPU_TYPE="nvidia-tesla-t4"
 
-gcloud beta builds submit \
+gcloud builds submit \
     --config run.yaml \
     --substitutions _REGION=$REGION,_GPU_TYPE=$GPU_TYPE \
     --no-source
diff --git a/dataflow/gpu-examples/tensorflow-landsat/.dockerignore b/dataflow/gpu-examples/tensorflow-landsat/.dockerignore
deleted file mode 100644
index 775d845fa58..00000000000
--- a/dataflow/gpu-examples/tensorflow-landsat/.dockerignore
+++ /dev/null
@@ -1,5 +0,0 @@
-# Ignore everything except the source files.
-**/*
-!Dockerfile
-!requirements.txt
-!*.py
diff --git a/dataflow/gpu-examples/tensorflow-landsat/.gcloudignore b/dataflow/gpu-examples/tensorflow-landsat/.gcloudignore
deleted file mode 100644
index 775d845fa58..00000000000
--- a/dataflow/gpu-examples/tensorflow-landsat/.gcloudignore
+++ /dev/null
@@ -1,5 +0,0 @@
-# Ignore everything except the source files.
-**/*
-!Dockerfile
-!requirements.txt
-!*.py
diff --git a/dataflow/gpu-examples/tensorflow-landsat/README.md b/dataflow/gpu-examples/tensorflow-landsat/README.md
index dd5b8fadbc7..a89183c977c 100644
--- a/dataflow/gpu-examples/tensorflow-landsat/README.md
+++ b/dataflow/gpu-examples/tensorflow-landsat/README.md
@@ -32,7 +32,7 @@ export OUTPUT_PATH="gs://$BUCKET/samples/dataflow/landsat/output-images/"
 export REGION="us-central1"
 export GPU_TYPE="nvidia-tesla-t4"
 
-gcloud beta builds submit \
+gcloud builds submit \
     --config run.yaml \
     --substitutions _OUTPUT_PATH=$OUTPUT_PATH,_REGION=$REGION,_GPU_TYPE=$GPU_TYPE \
     --no-source
diff --git a/dataflow/gpu-examples/tensorflow-minimal/.dockerignore b/dataflow/gpu-examples/tensorflow-minimal/.dockerignore
deleted file mode 100644
index 775d845fa58..00000000000
--- a/dataflow/gpu-examples/tensorflow-minimal/.dockerignore
+++ /dev/null
@@ -1,5 +0,0 @@
-# Ignore everything except the source files.
-**/*
-!Dockerfile
-!requirements.txt
-!*.py
diff --git a/dataflow/gpu-examples/tensorflow-minimal/.gcloudignore b/dataflow/gpu-examples/tensorflow-minimal/.gcloudignore
deleted file mode 100644
index 775d845fa58..00000000000
--- a/dataflow/gpu-examples/tensorflow-minimal/.gcloudignore
+++ /dev/null
@@ -1,5 +0,0 @@
-# Ignore everything except the source files.
-**/*
-!Dockerfile
-!requirements.txt
-!*.py
diff --git a/dataflow/gpu-examples/tensorflow-minimal/README.md b/dataflow/gpu-examples/tensorflow-minimal/README.md
index debd86b0e91..9a457deeeba 100644
--- a/dataflow/gpu-examples/tensorflow-minimal/README.md
+++ b/dataflow/gpu-examples/tensorflow-minimal/README.md
@@ -27,7 +27,7 @@ We use Cloud Build to run the [Dataflow](https://cloud.google.com/dataflow) job.
 export REGION="us-central1"
 export GPU_TYPE="nvidia-tesla-t4"
 
-gcloud beta builds submit \
+gcloud builds submit \
     --config run.yaml \
     --substitutions _REGION=$REGION,_GPU_TYPE=$GPU_TYPE \
     --no-source

From 085feeb504b47b41e501d7397f875e6fcc93f24f Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Fri, 25 Jun 2021 19:06:22 +0000
Subject: [PATCH 78/87] add gcloudignore to reduce test time

---
 dataflow/gpu-examples/pytorch-minimal/.gcloudignore    | 5 +++++
 dataflow/gpu-examples/tensorflow-landsat/.gcloudignore | 5 +++++
 dataflow/gpu-examples/tensorflow-minimal/.gcloudignore | 5 +++++
 3 files changed, 15 insertions(+)
 create mode 100644 dataflow/gpu-examples/pytorch-minimal/.gcloudignore
 create mode 100644 dataflow/gpu-examples/tensorflow-landsat/.gcloudignore
 create mode 100644 dataflow/gpu-examples/tensorflow-minimal/.gcloudignore

diff --git a/dataflow/gpu-examples/pytorch-minimal/.gcloudignore b/dataflow/gpu-examples/pytorch-minimal/.gcloudignore
new file mode 100644
index 00000000000..775d845fa58
--- /dev/null
+++ b/dataflow/gpu-examples/pytorch-minimal/.gcloudignore
@@ -0,0 +1,5 @@
+# Ignore everything except the source files.
+**/*
+!Dockerfile
+!requirements.txt
+!*.py
diff --git a/dataflow/gpu-examples/tensorflow-landsat/.gcloudignore b/dataflow/gpu-examples/tensorflow-landsat/.gcloudignore
new file mode 100644
index 00000000000..775d845fa58
--- /dev/null
+++ b/dataflow/gpu-examples/tensorflow-landsat/.gcloudignore
@@ -0,0 +1,5 @@
+# Ignore everything except the source files.
+**/*
+!Dockerfile
+!requirements.txt
+!*.py
diff --git a/dataflow/gpu-examples/tensorflow-minimal/.gcloudignore b/dataflow/gpu-examples/tensorflow-minimal/.gcloudignore
new file mode 100644
index 00000000000..775d845fa58
--- /dev/null
+++ b/dataflow/gpu-examples/tensorflow-minimal/.gcloudignore
@@ -0,0 +1,5 @@
+# Ignore everything except the source files.
+**/*
+!Dockerfile
+!requirements.txt
+!*.py

From dccd745695212eb6bf251d71620b5fb8cf0a59be Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 28 Jun 2021 18:21:59 +0000
Subject: [PATCH 79/87] reset gpu-workers entirely

---
 dataflow/gpu-workers/requirements.txt | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/dataflow/gpu-workers/requirements.txt b/dataflow/gpu-workers/requirements.txt
index 00058684873..6c5d4338609 100644
--- a/dataflow/gpu-workers/requirements.txt
+++ b/dataflow/gpu-workers/requirements.txt
@@ -1,9 +1,4 @@
 Pillow==8.2.0
-<<<<<<< HEAD
-apache-beam[gcp]==2.30.0
-rasterio==1.2.4
-=======
 apache-beam[gcp]==2.29.0
 rasterio==1.2.6
->>>>>>> d41f2ed158db07c062562d4cf93d626781466d9a
 tensorflow==2.5.0

From 7ccbdcabc6e90c14bb98b8c1adf547623b4fc9aa Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Thu, 1 Jul 2021 18:33:43 +0000
Subject: [PATCH 80/87] test in a single Python version

---
 dataflow/gpu-examples/pytorch-minimal/noxfile_config.py    | 5 ++++-
 dataflow/gpu-examples/tensorflow-landsat/noxfile_config.py | 5 ++++-
 dataflow/gpu-examples/tensorflow-minimal/noxfile_config.py | 5 ++++-
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/dataflow/gpu-examples/pytorch-minimal/noxfile_config.py b/dataflow/gpu-examples/pytorch-minimal/noxfile_config.py
index d8e9aba4fdd..627ee2bc4b8 100644
--- a/dataflow/gpu-examples/pytorch-minimal/noxfile_config.py
+++ b/dataflow/gpu-examples/pytorch-minimal/noxfile_config.py
@@ -22,7 +22,10 @@
 
 TEST_CONFIG_OVERRIDE = {
     # You can opt out from the test for specific Python versions.
-    "ignored_versions": ["2.7", "3.9"],
+    # > ℹ️ We're opting out of all Python versions except 3.8.
+    # > The Python version used is defined by the Dockerfile, so it's redundant
+    # > to run multiple tests since they would all be running the same Dockerfile.
+    "ignored_versions": ["2.7", "3.6", "3.7", "3.9"],
     # Old samples are opted out of enforcing Python type hints
     # All new samples should feature them
     "enforce_type_hints": True,
diff --git a/dataflow/gpu-examples/tensorflow-landsat/noxfile_config.py b/dataflow/gpu-examples/tensorflow-landsat/noxfile_config.py
index d8e9aba4fdd..627ee2bc4b8 100644
--- a/dataflow/gpu-examples/tensorflow-landsat/noxfile_config.py
+++ b/dataflow/gpu-examples/tensorflow-landsat/noxfile_config.py
@@ -22,7 +22,10 @@
 
 TEST_CONFIG_OVERRIDE = {
     # You can opt out from the test for specific Python versions.
-    "ignored_versions": ["2.7", "3.9"],
+    # > ℹ️ We're opting out of all Python versions except 3.8.
+    # > The Python version used is defined by the Dockerfile, so it's redundant
+    # > to run multiple tests since they would all be running the same Dockerfile.
+    "ignored_versions": ["2.7", "3.6", "3.7", "3.9"],
     # Old samples are opted out of enforcing Python type hints
     # All new samples should feature them
     "enforce_type_hints": True,
diff --git a/dataflow/gpu-examples/tensorflow-minimal/noxfile_config.py b/dataflow/gpu-examples/tensorflow-minimal/noxfile_config.py
index d8e9aba4fdd..627ee2bc4b8 100644
--- a/dataflow/gpu-examples/tensorflow-minimal/noxfile_config.py
+++ b/dataflow/gpu-examples/tensorflow-minimal/noxfile_config.py
@@ -22,7 +22,10 @@
 
 TEST_CONFIG_OVERRIDE = {
     # You can opt out from the test for specific Python versions.
-    "ignored_versions": ["2.7", "3.9"],
+    # > ℹ️ We're opting out of all Python versions except 3.8.
+    # > The Python version used is defined by the Dockerfile, so it's redundant
+    # > to run multiple tests since they would all be running the same Dockerfile.
+    "ignored_versions": ["2.7", "3.6", "3.7", "3.9"],
     # Old samples are opted out of enforcing Python type hints
     # All new samples should feature them
     "enforce_type_hints": True,

From 5142c9ebdbb6f5a3a5cee715850dcc76cf83358e Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 12 Jul 2021 19:43:02 +0000
Subject: [PATCH 81/87] run subprocess as daemon

---
 dataflow/gpu-examples/conftest.py | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/dataflow/gpu-examples/conftest.py b/dataflow/gpu-examples/conftest.py
index ed7387d209f..05e292fffca 100644
--- a/dataflow/gpu-examples/conftest.py
+++ b/dataflow/gpu-examples/conftest.py
@@ -31,7 +31,8 @@
 PROJECT = os.environ["GOOGLE_CLOUD_PROJECT"]
 REGION = "us-central1"
 
-RETRY_MAX_TIME = 5 * 60  # 5 minutes in seconds
+TIMEOUT_SEC = 30 * 60  # 30 minutes in seconds
+POLL_INTERVAL_SEC = 60  # 1 minute in seconds
 
 HYPHEN_NAME_RE = re.compile(r"[^\w\d-]+")
 UNDERSCORE_NAME_RE = re.compile(r"[^\w\d_]+")
@@ -178,13 +179,20 @@ def _infinite_publish_job() -> None:
         # Start a subprocess in the background to do the publishing.
         logging.info(f"Starting publisher on {topic_path}")
         p = mp.Process(target=_infinite_publish_job)
+
+        # We set the subprocess as a daemon so the main process doesn't wait for
+        # the subprocess to finish. Since this is an infinite loop, it will
+        # never finish, so it would cause the whole test to hang.
+        # Typically, `terminate` should stop the subprocess during the fixture
+        # cleanup phase, but we've had cases where the tests hang, most likely
+        # due to concurrency issues with pytest running in parallel.
+        p.daemon = True
         p.start()
 
         yield p.is_alive()
 
         # For cleanup, terminate the background process.
         logging.info("Stopping publisher")
-        p.join(timeout=0)
         p.terminate()
 
     @staticmethod
@@ -225,7 +233,7 @@ def cloud_build_submit(
                     yield f.read()
             except Exception as e:
                 logging.exception(e)
-                logging.warning(f'Current directory: {os.getcwd()}')
+                logging.warning(f"Current directory: {os.getcwd()}")
                 yield config
         elif image_name:
             cmd = [
@@ -329,9 +337,9 @@ def dataflow_jobs_wait(
         project: str = PROJECT,
         region: str = REGION,
         until_status: str = "JOB_STATE_DONE",
-        timeout_sec: str = 30 * 60,
-        poll_interval_sec: int = 60,
         list_page_size: int = 100,
+        timeout_sec: str = TIMEOUT_SEC,
+        poll_interval_sec: int = POLL_INTERVAL_SEC,
     ) -> Optional[str]:
         """For a list of all the valid states:
         https://cloud.google.com/dataflow/docs/reference/rest/v1b3/projects.jobs#Job.JobState

From 7caca190c3fb95810c239fc636f90da3694471f4 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 12 Jul 2021 19:43:16 +0000
Subject: [PATCH 82/87] update beam version

---
 dataflow/gpu-examples/pytorch-minimal/Dockerfile          | 2 +-
 dataflow/gpu-examples/pytorch-minimal/requirements.txt    | 2 +-
 dataflow/gpu-examples/tensorflow-landsat/Dockerfile       | 2 +-
 dataflow/gpu-examples/tensorflow-landsat/requirements.txt | 2 +-
 dataflow/gpu-examples/tensorflow-minimal/Dockerfile       | 2 +-
 dataflow/gpu-examples/tensorflow-minimal/requirements.txt | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/dataflow/gpu-examples/pytorch-minimal/Dockerfile b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
index 94aa381e344..ce52169aa54 100644
--- a/dataflow/gpu-examples/pytorch-minimal/Dockerfile
+++ b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
@@ -17,7 +17,7 @@ FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-runtime
 WORKDIR /pipeline
 
 # Copy the Apache Beam worker files and the pipeline source files.
-COPY --from=apache/beam_python3.8_sdk:2.30.0 /opt/apache/beam /opt/apache/beam
+COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
diff --git a/dataflow/gpu-examples/pytorch-minimal/requirements.txt b/dataflow/gpu-examples/pytorch-minimal/requirements.txt
index ad5777b6ca6..b766a3c2a56 100644
--- a/dataflow/gpu-examples/pytorch-minimal/requirements.txt
+++ b/dataflow/gpu-examples/pytorch-minimal/requirements.txt
@@ -1,2 +1,2 @@
-apache-beam[gcp]==2.30.0
+apache-beam[gcp]==2.31.0
 torch==1.8.1
diff --git a/dataflow/gpu-examples/tensorflow-landsat/Dockerfile b/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
index 7a50a862756..7bf18507825 100644
--- a/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
+++ b/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
@@ -21,7 +21,7 @@ FROM nvcr.io/nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
 WORKDIR /pipeline
 
 # Copy the Apache Beam worker files and the pipeline source files.
-COPY --from=apache/beam_python3.8_sdk:2.30.0 /opt/apache/beam /opt/apache/beam
+COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
diff --git a/dataflow/gpu-examples/tensorflow-landsat/requirements.txt b/dataflow/gpu-examples/tensorflow-landsat/requirements.txt
index cbfaaf47f75..17a0da72dd9 100644
--- a/dataflow/gpu-examples/tensorflow-landsat/requirements.txt
+++ b/dataflow/gpu-examples/tensorflow-landsat/requirements.txt
@@ -1,4 +1,4 @@
 Pillow==8.2.0
-apache-beam[gcp]==2.30.0
+apache-beam[gcp]==2.31.0
 rasterio==1.2.4
 tensorflow==2.5.0
diff --git a/dataflow/gpu-examples/tensorflow-minimal/Dockerfile b/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
index 48b4b390eeb..11bd4840cb5 100644
--- a/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
+++ b/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
@@ -21,7 +21,7 @@ FROM nvcr.io/nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
 WORKDIR /pipeline
 
 # Copy the Apache Beam worker files and the pipeline source files.
-COPY --from=apache/beam_python3.8_sdk:2.30.0 /opt/apache/beam /opt/apache/beam
+COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
diff --git a/dataflow/gpu-examples/tensorflow-minimal/requirements.txt b/dataflow/gpu-examples/tensorflow-minimal/requirements.txt
index aa9e7e634f5..f9d374a91aa 100644
--- a/dataflow/gpu-examples/tensorflow-minimal/requirements.txt
+++ b/dataflow/gpu-examples/tensorflow-minimal/requirements.txt
@@ -1,2 +1,2 @@
-apache-beam[gcp]==2.30.0
+apache-beam[gcp]==2.31.0
 tensorflow==2.5.0

From e6876817fa77fde386c227396a6cb38b736e9446 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Mon, 12 Jul 2021 22:14:38 +0000
Subject: [PATCH 83/87] install g++ to compile google-cloud-profiler

---
 dataflow/gpu-examples/pytorch-minimal/Dockerfile    | 12 ++++++++----
 dataflow/gpu-examples/tensorflow-landsat/Dockerfile |  3 ++-
 dataflow/gpu-examples/tensorflow-minimal/Dockerfile |  3 ++-
 3 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/dataflow/gpu-examples/pytorch-minimal/Dockerfile b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
index ce52169aa54..08cc9c6fe64 100644
--- a/dataflow/gpu-examples/pytorch-minimal/Dockerfile
+++ b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
@@ -21,10 +21,14 @@ COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
-# Install the pipeline requirements and check that there are no conflicts.
-# Since the image already has all the dependencies installed,
-# there's no need to run with the --requirements_file option.
-RUN pip install --no-cache-dir --upgrade pip \
+RUN apt-get update \
+    # Since Apache Beam 2.31.0, we need g++ to compile google-cloud-profiler.
+    && apt-get -y install g++ \
+    && rm -rf /var/lib/apt/lists/* \
+    # Install the pipeline requirements and check that there are no conflicts.
+    # Since the image already has all the dependencies installed,
+    # there's no need to run with the --requirements_file option.
+    && pip install --no-cache-dir --upgrade pip \
     && pip install --no-cache-dir -r requirements.txt \
     && pip check
 
diff --git a/dataflow/gpu-examples/tensorflow-landsat/Dockerfile b/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
index 7bf18507825..e0298b63c2e 100644
--- a/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
+++ b/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
@@ -28,7 +28,8 @@ COPY *.py ./
 # If you need a different Python version, consider:
 #   https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
 RUN apt-get update \
-    && apt-get install -y curl python3.8 python3-distutils \
+    # Since Apache Beam 2.31.0, we need g++ to compile google-cloud-profiler.
+    && apt-get install -y curl g++ python3.8 python3-distutils \
     && rm -rf /var/lib/apt/lists/* \
     && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
     && curl https://bootstrap.pypa.io/get-pip.py | python \
diff --git a/dataflow/gpu-examples/tensorflow-minimal/Dockerfile b/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
index 11bd4840cb5..b2598fdfee0 100644
--- a/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
+++ b/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
@@ -28,7 +28,8 @@ COPY *.py ./
 # If you need a different Python version, consider:
 #   https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
 RUN apt-get update \
-    && apt-get install -y curl python3.8 python3-distutils \
+    # Since Apache Beam 2.31.0, we need g++ to compile google-cloud-profiler.
+    && apt-get install -y curl g++ python3.8 python3-distutils \
     && rm -rf /var/lib/apt/lists/* \
     && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
     && curl https://bootstrap.pypa.io/get-pip.py | python \

From 9c11b461c74dc5edb7f7519e6ad5493c74f019a0 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 13 Jul 2021 00:30:51 +0000
Subject: [PATCH 84/87] fix build dependencies

---
 dataflow/gpu-examples/pytorch-minimal/Dockerfile       | 8 +++-----
 dataflow/gpu-examples/pytorch-minimal/requirements.txt | 2 +-
 dataflow/gpu-examples/tensorflow-landsat/Dockerfile    | 7 ++-----
 dataflow/gpu-examples/tensorflow-minimal/Dockerfile    | 7 ++-----
 4 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/dataflow/gpu-examples/pytorch-minimal/Dockerfile b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
index 08cc9c6fe64..52427ced6a8 100644
--- a/dataflow/gpu-examples/pytorch-minimal/Dockerfile
+++ b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
@@ -12,18 +12,15 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-FROM pytorch/pytorch:1.8.1-cuda11.1-cudnn8-runtime
+FROM pytorch/pytorch:1.9.0-cuda11.1-cudnn8-runtime
 
 WORKDIR /pipeline
 
-# Copy the Apache Beam worker files and the pipeline source files.
-COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
 RUN apt-get update \
-    # Since Apache Beam 2.31.0, we need g++ to compile google-cloud-profiler.
-    && apt-get -y install g++ \
+    && apt-get install -y --no-install-recommends g++ \
     && rm -rf /var/lib/apt/lists/* \
     # Install the pipeline requirements and check that there are no conflicts.
     # Since the image already has all the dependencies installed,
@@ -33,4 +30,5 @@ RUN apt-get update \
     && pip check
 
 # Set the entrypoint to Apache Beam SDK worker launcher.
+COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 ENTRYPOINT [ "/opt/apache/beam/boot" ]
diff --git a/dataflow/gpu-examples/pytorch-minimal/requirements.txt b/dataflow/gpu-examples/pytorch-minimal/requirements.txt
index b766a3c2a56..fbf5febe81a 100644
--- a/dataflow/gpu-examples/pytorch-minimal/requirements.txt
+++ b/dataflow/gpu-examples/pytorch-minimal/requirements.txt
@@ -1,2 +1,2 @@
 apache-beam[gcp]==2.31.0
-torch==1.8.1
+torch==1.9.0
diff --git a/dataflow/gpu-examples/tensorflow-landsat/Dockerfile b/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
index e0298b63c2e..85ea20c7d93 100644
--- a/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
+++ b/dataflow/gpu-examples/tensorflow-landsat/Dockerfile
@@ -20,25 +20,22 @@ FROM nvcr.io/nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
 
 WORKDIR /pipeline
 
-# Copy the Apache Beam worker files and the pipeline source files.
-COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
 # If you need a different Python version, consider:
 #   https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
 RUN apt-get update \
-    # Since Apache Beam 2.31.0, we need g++ to compile google-cloud-profiler.
-    && apt-get install -y curl g++ python3.8 python3-distutils \
+    && apt-get install -y --no-install-recommends curl g++ python3.8-dev python3-distutils \
     && rm -rf /var/lib/apt/lists/* \
     && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
     && curl https://bootstrap.pypa.io/get-pip.py | python \
     # Install the pipeline requirements and check that there are no conflicts.
     # Since the image already has all the dependencies installed,
     # there's no need to run with the --requirements_file option.
-    && pip install --no-cache-dir --upgrade pip \
     && pip install --no-cache-dir -r requirements.txt \
     && pip check
 
 # Set the entrypoint to Apache Beam SDK worker launcher.
+COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 ENTRYPOINT [ "/opt/apache/beam/boot" ]
diff --git a/dataflow/gpu-examples/tensorflow-minimal/Dockerfile b/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
index b2598fdfee0..39c185782bd 100644
--- a/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
+++ b/dataflow/gpu-examples/tensorflow-minimal/Dockerfile
@@ -20,25 +20,22 @@ FROM nvcr.io/nvidia/cuda:11.2.2-cudnn8-runtime-ubuntu20.04
 
 WORKDIR /pipeline
 
-# Copy the Apache Beam worker files and the pipeline source files.
-COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 COPY requirements.txt .
 COPY *.py ./
 
 # If you need a different Python version, consider:
 #   https://launchpad.net/~deadsnakes/+archive/ubuntu/ppa
 RUN apt-get update \
-    # Since Apache Beam 2.31.0, we need g++ to compile google-cloud-profiler.
-    && apt-get install -y curl g++ python3.8 python3-distutils \
+    && apt-get install -y --no-install-recommends curl g++ python3.8-dev python3-distutils \
     && rm -rf /var/lib/apt/lists/* \
     && update-alternatives --install /usr/bin/python python /usr/bin/python3.8 10 \
     && curl https://bootstrap.pypa.io/get-pip.py | python \
     # Install the pipeline requirements and check that there are no conflicts.
     # Since the image already has all the dependencies installed,
     # there's no need to run with the --requirements_file option.
-    && pip install --no-cache-dir --upgrade pip \
     && pip install --no-cache-dir -r requirements.txt \
     && pip check
 
 # Set the entrypoint to Apache Beam SDK worker launcher.
+COPY --from=apache/beam_python3.8_sdk:2.31.0 /opt/apache/beam /opt/apache/beam
 ENTRYPOINT [ "/opt/apache/beam/boot" ]

From cb5b4dd020754feb606d45c04a704f9c7054dee8 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 13 Jul 2021 00:52:13 +0000
Subject: [PATCH 85/87] fix build dependencies

---
 dataflow/gpu-examples/pytorch-minimal/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/gpu-examples/pytorch-minimal/Dockerfile b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
index 52427ced6a8..c59eb99a09e 100644
--- a/dataflow/gpu-examples/pytorch-minimal/Dockerfile
+++ b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
@@ -20,7 +20,7 @@ COPY requirements.txt .
 COPY *.py ./
 
 RUN apt-get update \
-    && apt-get install -y --no-install-recommends g++ \
+    && apt-get install -y --no-install-recommends g++ python3-dev \
     && rm -rf /var/lib/apt/lists/* \
     # Install the pipeline requirements and check that there are no conflicts.
     # Since the image already has all the dependencies installed,

From a503021009b5a2ba742d1ff54f5516c3ba020f62 Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 13 Jul 2021 00:56:26 +0000
Subject: [PATCH 86/87] fix build dependencies

---
 dataflow/gpu-examples/pytorch-minimal/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dataflow/gpu-examples/pytorch-minimal/Dockerfile b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
index c59eb99a09e..52427ced6a8 100644
--- a/dataflow/gpu-examples/pytorch-minimal/Dockerfile
+++ b/dataflow/gpu-examples/pytorch-minimal/Dockerfile
@@ -20,7 +20,7 @@ COPY requirements.txt .
 COPY *.py ./
 
 RUN apt-get update \
-    && apt-get install -y --no-install-recommends g++ python3-dev \
+    && apt-get install -y --no-install-recommends g++ \
     && rm -rf /var/lib/apt/lists/* \
     # Install the pipeline requirements and check that there are no conflicts.
     # Since the image already has all the dependencies installed,

From 0c8c2c15bca31c74c7839e9dc7993b7ea0aaf40c Mon Sep 17 00:00:00 2001
From: David Cavazos <dcavazos@google.com>
Date: Tue, 13 Jul 2021 00:58:27 +0000
Subject: [PATCH 87/87] adjust timeout

---
 dataflow/gpu-examples/pytorch-minimal/build.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/dataflow/gpu-examples/pytorch-minimal/build.yaml b/dataflow/gpu-examples/pytorch-minimal/build.yaml
index eed5c16aa70..24a5b57e772 100644
--- a/dataflow/gpu-examples/pytorch-minimal/build.yaml
+++ b/dataflow/gpu-examples/pytorch-minimal/build.yaml
@@ -30,3 +30,5 @@ images: [ gcr.io/$PROJECT_ID/$_IMAGE ]
 
 options:
   machineType: E2_HIGHCPU_8
+
+timeout: 1200s