apache · jto · Jul 23, 2024 · Jul 23, 2024 · Aug 19, 2024 · Aug 20, 2024
diff --git a/.github/trigger_files/beam_PostCommit_Go_VR_Flink.json b/.github/trigger_files/beam_PostCommit_Go_VR_Flink.json
@@ -1,5 +1,6 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
   "modification": 1,
+  "https://github.com/apache/beam/pull/32440": "testing datastream optimizations",
   "https://github.com/apache/beam/pull/32648": "testing addition of Flink 1.19 support"
 }
diff --git a/.github/trigger_files/beam_PostCommit_Java_Examples_Flink.json b/.github/trigger_files/beam_PostCommit_Java_Examples_Flink.json
@@ -1,3 +1,4 @@
 {
-    "https://github.com/apache/beam/pull/32648": "testing flink 1.19 support"
+  "https://github.com/apache/beam/pull/32440": "testing datastream optimizations",
+  "https://github.com/apache/beam/pull/32648": "testing flink 1.19 support"
 }
diff --git a/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Batch.json b/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Batch.json
@@ -1,4 +1,5 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
+  "https://github.com/apache/beam/pull/32440": "test new datastream runner for batch"
   "modification": 2
 }
diff --git a/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Docker.json b/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Docker.json
@@ -1,4 +1,6 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
   "modification": "1"
+  "https://github.com/apache/beam/pull/31156": "noting that PR #31156 should run this test",
+  "https://github.com/apache/beam/pull/32440": "test new datastream runner for batch"
 }
diff --git a/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Streaming.json b/.github/trigger_files/beam_PostCommit_Java_PVR_Flink_Streaming.json
@@ -1,4 +1,5 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
-  "modification": 1
+  "modification": 1,
+  "https://github.com/apache/beam/pull/32440": "test new datastream runner for batch"
 }
diff --git a/.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Flink.json b/.github/trigger_files/beam_PostCommit_Java_ValidatesRunner_Flink.json
@@ -1,5 +1,6 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run",
   "https://github.com/apache/beam/pull/31156": "noting that PR #31156 should run this test",
+  "https://github.com/apache/beam/pull/32440": "testing datastream optimizations",
   "https://github.com/apache/beam/pull/32648": "testing addition of Flink 1.19 support"
 }
diff --git a/.github/trigger_files/beam_PostCommit_Python.json b/.github/trigger_files/beam_PostCommit_Python.json
@@ -1,5 +1,6 @@
 {
   "comment": "Modify this file in a trivial way to cause this test suite to run.",
-  "modification": 3
+  "modification": 4,
+  "https://github.com/apache/beam/pull/32440": "test new datastream runner for batch"
 }
 
diff --git a/.github/trigger_files/beam_PostCommit_XVR_Flink.json b/.github/trigger_files/beam_PostCommit_XVR_Flink.json
@@ -1,3 +1,4 @@
 {
-  "https://github.com/apache/beam/pull/32648": "testing Flink 1.19 support"
+  "https://github.com/apache/beam/pull/32440": "testing datastream optimizations",
+  "https://github.com/apache/beam/pull/32648": "testing addition of Flink 1.19 support"
 }
diff --git a/...java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetNewDoFn.java b/...java/src/main/java/org/apache/beam/runners/core/GroupAlsoByWindowViaWindowSetNewDoFn.java
@@ -18,6 +18,7 @@
 package org.apache.beam.runners.core;
 
 import java.util.Collection;
+import org.apache.beam.model.pipeline.v1.RunnerApi;
 import org.apache.beam.runners.core.triggers.ExecutableTriggerStateMachine;
 import org.apache.beam.runners.core.triggers.TriggerStateMachines;
 import org.apache.beam.sdk.transforms.DoFn;
@@ -41,6 +42,7 @@ public class GroupAlsoByWindowViaWindowSetNewDoFn<
     extends DoFn<RinT, KV<K, OutputT>> {
 
   private static final long serialVersionUID = 1L;
+  private final RunnerApi.Trigger triggerProto;
 
   public static <K, InputT, OutputT, W extends BoundedWindow>
       DoFn<KeyedWorkItem<K, InputT>, KV<K, OutputT>> create(
@@ -86,6 +88,7 @@ public GroupAlsoByWindowViaWindowSetNewDoFn(
     this.windowingStrategy = noWildcard;
     this.reduceFn = reduceFn;
     this.stateInternalsFactory = stateInternalsFactory;
+    this.triggerProto = TriggerTranslation.toProto(windowingStrategy.getTrigger());
   }
 
   private OutputWindowedValue<KV<K, OutputT>> outputWindowedValue() {
@@ -124,8 +127,7 @@ public void processElement(ProcessContext c) throws Exception {
             key,
             windowingStrategy,
             ExecutableTriggerStateMachine.create(
-                TriggerStateMachines.stateMachineForTrigger(
-                    TriggerTranslation.toProto(windowingStrategy.getTrigger()))),
+                TriggerStateMachines.stateMachineForTrigger(triggerProto)),
             stateInternals,
             timerInternals,
             outputWindowedValue(),

diff --git a/...17/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java b/...17/src/main/java/org/apache/beam/runners/flink/translation/types/CoderTypeSerializer.java
@@ -47,23 +47,21 @@ public class CoderTypeSerializer<T> extends TypeSerializer<T> {
 
   private final Coder<T> coder;
 
-  /**
-   * {@link SerializablePipelineOptions} deserialization will cause {@link
-   * org.apache.beam.sdk.io.FileSystems} registration needed for {@link
-   * org.apache.beam.sdk.transforms.Reshuffle} translation.
-   */
-  private final SerializablePipelineOptions pipelineOptions;
-
   private final boolean fasterCopy;
 
   public CoderTypeSerializer(Coder<T> coder, SerializablePipelineOptions pipelineOptions) {
+    this(
+        coder,
+        Preconditions.checkNotNull(pipelineOptions)
+            .get()
+            .as(FlinkPipelineOptions.class)
+            .getFasterCopy());
+  }
+
+  public CoderTypeSerializer(Coder<T> coder, boolean fasterCopy) {
     Preconditions.checkNotNull(coder);
-    Preconditions.checkNotNull(pipelineOptions);
     this.coder = coder;
-    this.pipelineOptions = pipelineOptions;
-
-    FlinkPipelineOptions options = pipelineOptions.get().as(FlinkPipelineOptions.class);
-    this.fasterCopy = options.getFasterCopy();
+    this.fasterCopy = fasterCopy;
   }
 
   @Override
@@ -73,7 +71,7 @@ public boolean isImmutableType() {
 
   @Override
   public CoderTypeSerializer<T> duplicate() {
-    return new CoderTypeSerializer<>(coder, pipelineOptions);
+    return new CoderTypeSerializer<>(coder, fasterCopy);
   }
 
   @Override

diff --git a/runners/flink/flink_runner.gradle b/runners/flink/flink_runner.gradle
@@ -236,6 +236,10 @@ class ValidatesRunnerConfig {
 def sickbayTests = [
         // TODO(https://github.com/apache/beam/issues/21306)
         'org.apache.beam.sdk.transforms.ParDoTest$TimestampTests.testOnWindowTimestampSkew',
+        // Flink errors are not deterministic. Exception may just be
+        // org.apache.flink.runtime.operators.coordination.TaskNotRunningException: Task is not running, but in state FAILED
+        // instead of the actual cause. Real cause is visible in the logs.
+        'org.apache.beam.sdk.transforms.ParDoTest$LifecycleTests'
 ]
 
 def createValidatesRunnerTask(Map m) {
@@ -249,7 +253,7 @@ def createValidatesRunnerTask(Map m) {
     def pipelineOptionsArray = ["--runner=TestFlinkRunner",
                                 "--streaming=${config.streaming}",
                                 "--useDataStreamForBatch=${config.useDataStreamForBatch}",
-                                "--parallelism=2",
+                                "--parallelism=1",
     ]
     if (config.checkpointing) {
       pipelineOptionsArray.addAll([
@@ -266,6 +270,8 @@ def createValidatesRunnerTask(Map m) {
     )
     // maxParallelForks decreased from 4 in order to avoid OOM errors
     maxParallelForks 2
+    def flinkConfDir = System.getProperty("user.dir") + "/runners/flink/src/test/validatesRunnerConfig"
+    environment("FLINK_CONF_DIR", flinkConfDir)
     useJUnit {
       if (config.checkpointing) {
         includeCategories 'org.apache.beam.sdk.testing.UsesBundleFinalizer'

diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkExecutionEnvironments.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkExecutionEnvironments.java
@@ -237,6 +237,16 @@ public static StreamExecutionEnvironment createStreamExecutionEnvironment(
     flinkStreamEnv.setParallelism(parallelism);
     if (options.getMaxParallelism() > 0) {
       flinkStreamEnv.setMaxParallelism(options.getMaxParallelism());
+    } else if (!options.isStreaming()) {
+      // In Flink maxParallelism defines the number of keyGroups.
+      // (see
+      // https://github.com/apache/flink/blob/e9dd4683f758b463d0b5ee18e49cecef6a70c5cf/flink-runtime/src/main/java/org/apache/flink/runtime/state/KeyGroupRangeAssignment.java#L76)
+      // The default value (parallelism * 1.5)
+      // (see
+      // https://github.com/apache/flink/blob/e9dd4683f758b463d0b5ee18e49cecef6a70c5cf/flink-runtime/src/main/java/org/apache/flink/runtime/state/KeyGroupRangeAssignment.java#L137-L147)
+      // create a lot of skew so we force maxParallelism = parallelism in Batch mode.
+      LOG.info("Setting maxParallelism to {}", parallelism);
+      flinkStreamEnv.setMaxParallelism(parallelism);
     }
     // set parallelism in the options (required by some execution code)
     options.setParallelism(parallelism);

diff --git a/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java b/runners/flink/src/main/java/org/apache/beam/runners/flink/FlinkPipelineOptions.java
@@ -262,7 +262,7 @@ public Long create(PipelineOptions options) {
       if (options.as(StreamingOptions.class).isStreaming()) {
         return 1000L;
       } else {
-        return 1000000L;
+        return 5000L;
       }
     }
   }
@@ -382,6 +382,13 @@ public Long create(PipelineOptions options) {
 
   void setEnableStableInputDrain(Boolean enableStableInputDrain);
 
+  @Description(
+      "Set a slot sharing group for all bounded sources. This is required when using Datastream to have the same scheduling behaviour as the Dataset API.")
+  @Default.Boolean(true)
+  Boolean getForceSlotSharingGroup();
+
+  void setForceSlotSharingGroup(Boolean enableStableInputDrain);
+
   static FlinkPipelineOptions defaults() {
     return PipelineOptionsFactory.as(FlinkPipelineOptions.class);
   }