-
Notifications
You must be signed in to change notification settings - Fork 4.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Upgrade transforms without upgrading the pipelines #28210
Changes from 5 commits
f3aa0ae
3edbd62
7a4271f
e589f03
6a85535
732bd32
3898c12
55b74b5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.beam.runners.core.construction; | ||
|
||
import java.util.List; | ||
import org.apache.beam.sdk.options.Default; | ||
import org.apache.beam.sdk.options.Description; | ||
import org.apache.beam.sdk.options.PipelineOptions; | ||
import org.apache.beam.sdk.transforms.resourcehints.ResourceHintsOptions.EmptyListDefault; | ||
|
||
public interface ExternalTranslationOptions extends PipelineOptions { | ||
|
||
@Description("Set of URNs of transforms to be overriden using the transform service.") | ||
@Default.InstanceFactory(EmptyListDefault.class) | ||
List<String> getTransformsToOverride(); | ||
|
||
void setTransformsToOverride(List<String> transformsToOverride); | ||
|
||
@Description("Address of an already available transform service.") | ||
String getTransformServiceAddress(); | ||
|
||
void setTransformServiceAddress(String transformServiceAddress); | ||
|
||
@Description("An available Beam version which will be used to start a transform service.") | ||
String getTransformServiceBeamVersion(); | ||
|
||
void setTransformServiceBeamVersion(String transformServiceBeamVersion); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.beam.runners.core.construction; | ||
|
||
import com.google.auto.service.AutoService; | ||
import org.apache.beam.sdk.annotations.Internal; | ||
import org.apache.beam.sdk.options.PipelineOptions; | ||
import org.apache.beam.sdk.options.PipelineOptionsRegistrar; | ||
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableList; | ||
|
||
/** A registrar for ExternalTranslationOptions. */ | ||
@AutoService(PipelineOptionsRegistrar.class) | ||
@Internal | ||
public class ExternalTranslationOptionsRegistrar implements PipelineOptionsRegistrar { | ||
@Override | ||
public Iterable<Class<? extends PipelineOptions>> getPipelineOptions() { | ||
return ImmutableList.<Class<? extends PipelineOptions>>builder() | ||
.add(ExternalTranslationOptions.class) | ||
.build(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,6 +21,7 @@ | |
import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState; | ||
|
||
import java.io.IOException; | ||
import java.io.ObjectOutputStream; | ||
import java.util.Collection; | ||
import java.util.Collections; | ||
import java.util.Comparator; | ||
|
@@ -37,14 +38,18 @@ | |
import org.apache.beam.runners.core.construction.ExternalTranslation.ExternalTranslator; | ||
import org.apache.beam.runners.core.construction.ParDoTranslation.ParDoTranslator; | ||
import org.apache.beam.sdk.Pipeline; | ||
import org.apache.beam.sdk.coders.RowCoder; | ||
import org.apache.beam.sdk.io.Read; | ||
import org.apache.beam.sdk.runners.AppliedPTransform; | ||
import org.apache.beam.sdk.schemas.SchemaTranslation; | ||
import org.apache.beam.sdk.transforms.PTransform; | ||
import org.apache.beam.sdk.transforms.display.DisplayData; | ||
import org.apache.beam.sdk.util.ByteStringOutputStream; | ||
import org.apache.beam.sdk.util.common.ReflectHelpers.ObjectsClassComparator; | ||
import org.apache.beam.sdk.values.PCollection; | ||
import org.apache.beam.sdk.values.PInput; | ||
import org.apache.beam.sdk.values.POutput; | ||
import org.apache.beam.sdk.values.Row; | ||
import org.apache.beam.sdk.values.TupleTag; | ||
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Joiner; | ||
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap; | ||
|
@@ -54,6 +59,8 @@ | |
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Sets; | ||
import org.checkerframework.checker.nullness.qual.NonNull; | ||
import org.checkerframework.checker.nullness.qual.Nullable; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
/** | ||
* Utilities for converting {@link PTransform PTransforms} to {@link RunnerApi Runner API protocol | ||
|
@@ -65,10 +72,14 @@ | |
"keyfor" | ||
}) // TODO(https://github.com/apache/beam/issues/20497) | ||
public class PTransformTranslation { | ||
|
||
private static final Logger LOG = LoggerFactory.getLogger(PTransformTranslation.class); | ||
|
||
// We specifically copy the values here so that they can be used in switch case statements | ||
// and we validate that the value matches the actual URN in the static block below. | ||
|
||
// Primitives | ||
public static final String CREATE_TRANSFORM_URN = "beam:transform:create:v1"; | ||
public static final String PAR_DO_TRANSFORM_URN = "beam:transform:pardo:v1"; | ||
public static final String FLATTEN_TRANSFORM_URN = "beam:transform:flatten:v1"; | ||
public static final String GROUP_BY_KEY_TRANSFORM_URN = "beam:transform:group_by_key:v1"; | ||
|
@@ -83,6 +94,10 @@ public class PTransformTranslation { | |
public static final ImmutableSet<String> RUNNER_IMPLEMENTED_TRANSFORMS = | ||
ImmutableSet.of(GROUP_BY_KEY_TRANSFORM_URN, IMPULSE_TRANSFORM_URN); | ||
|
||
public static final String CONFIG_ROW_KEY = "config_row"; | ||
|
||
public static final String CONFIG_ROW_SCHEMA_KEY = "config_row_schema"; | ||
|
||
// DeprecatedPrimitives | ||
/** | ||
* @deprecated SDKs should move away from creating `Read` transforms and migrate to using Impulse | ||
|
@@ -435,10 +450,9 @@ public RunnerApi.PTransform translate( | |
RunnerApi.PTransform.Builder transformBuilder = | ||
translateAppliedPTransform(appliedPTransform, subtransforms, components); | ||
|
||
FunctionSpec spec = | ||
KNOWN_PAYLOAD_TRANSLATORS | ||
.get(appliedPTransform.getTransform().getClass()) | ||
.translate(appliedPTransform, components); | ||
TransformPayloadTranslator payloadTranslator = | ||
KNOWN_PAYLOAD_TRANSLATORS.get(appliedPTransform.getTransform().getClass()); | ||
FunctionSpec spec = payloadTranslator.translate(appliedPTransform, components); | ||
if (spec != null) { | ||
transformBuilder.setSpec(spec); | ||
|
||
|
@@ -461,6 +475,38 @@ public RunnerApi.PTransform translate( | |
} | ||
} | ||
} | ||
|
||
Row configRow = null; | ||
try { | ||
configRow = payloadTranslator.toConfigRow(appliedPTransform.getTransform()); | ||
} catch (UnsupportedOperationException e) { | ||
// Optional toConfigRow() has not been implemented. We can just ignore. | ||
} catch (Exception e) { | ||
LOG.warn( | ||
"Could not attach the config row for transform " | ||
+ appliedPTransform.getTransform().getName() | ||
+ ": " | ||
+ e); | ||
// Ignoring the error and continuing with the translation since attaching config rows is | ||
// optional. | ||
} | ||
if (configRow != null) { | ||
ByteStringOutputStream rowOutputStream = new ByteStringOutputStream(); | ||
try { | ||
RowCoder.of(configRow.getSchema()).encode(configRow, rowOutputStream); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
transformBuilder.putAnnotations(CONFIG_ROW_KEY, rowOutputStream.toByteString()); | ||
|
||
ByteStringOutputStream schemaOutputStream = new ByteStringOutputStream(); | ||
try (ObjectOutputStream schemaObjOut = new ObjectOutputStream(schemaOutputStream)) { | ||
schemaObjOut.writeObject(SchemaTranslation.schemaToProto(configRow.getSchema(), true)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. SchemaTranslation.schemaToProto is a Proto object. Just use There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated. |
||
schemaObjOut.flush(); | ||
transformBuilder.putAnnotations(CONFIG_ROW_SCHEMA_KEY, schemaOutputStream.toByteString()); | ||
} | ||
} | ||
|
||
return transformBuilder.build(); | ||
} | ||
} | ||
|
@@ -508,14 +554,63 @@ static RunnerApi.PTransform.Builder translateAppliedPTransform( | |
* | ||
* <p>When going to a protocol buffer message, the translator produces a payload corresponding to | ||
* the Java representation while registering components that payload references. | ||
* | ||
* <p>Also, provides methods for generating a Row-based constructor config for the transform that | ||
* can be later used to re-construct the transform. | ||
*/ | ||
public interface TransformPayloadTranslator<T extends PTransform<?, ?>> { | ||
String getUrn(T transform); | ||
|
||
/** | ||
* Provides a unique URN for transforms represented by this {@code TransformPayloadTranslator}. | ||
*/ | ||
String getUrn(); | ||
|
||
/** | ||
* Same as {@link #getUrn()} but the returned URN may depend on the transform provided. | ||
* | ||
* <p>Only override this if the same {@code TransformPayloadTranslator} used for multiple | ||
* transforms. Otherwise, use {@link #getUrn()}. | ||
*/ | ||
default String getUrn(T transform) { | ||
return getUrn(); | ||
} | ||
|
||
/** */ | ||
/** | ||
* Translates the given transform represented by the provided {@code AppliedPTransform} to a | ||
* {@code FunctionSpec} with a URN and a payload. | ||
* | ||
* @param application an {@code AppliedPTransform} that includes the transform to be expanded. | ||
* @param components components of the pipeline that includes the transform. | ||
* @return a generated spec for the transform to be included in the pipeline proto. If return | ||
* value is null, transform should include an empty spec. | ||
* @throws IOException | ||
*/ | ||
@Nullable | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Document what a null return value means. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. |
||
FunctionSpec translate(AppliedPTransform<?, ?, T> application, SdkComponents components) | ||
throws IOException; | ||
|
||
/** | ||
* Generates a Row-based construction configuration for the provided transform. | ||
* | ||
* @param transform a transform represented by the current {@code TransformPayloadTranslator}. | ||
* @return | ||
*/ | ||
default Row toConfigRow(T transform) { | ||
throw new UnsupportedOperationException("Not implemented"); | ||
} | ||
|
||
/** | ||
* Construts a transform from a provided Row-based construction configuration. | ||
* | ||
* @param configRow a construction configuration similar to what would be generated by the | ||
* {@link #toConfigRow(PTransform)} method. | ||
* @return a transform represented by the current {@code TransformPayloadTranslator}. | ||
*/ | ||
default T fromConfigRow(Row configRow) { | ||
throw new UnsupportedOperationException("Not implemented"); | ||
} | ||
|
||
/** | ||
* A {@link TransformPayloadTranslator} for transforms that contain no references to components, | ||
* so they do not need a specialized rehydration. | ||
|
@@ -526,7 +621,7 @@ abstract class NotSerializable<T extends PTransform<?, ?>> | |
public static NotSerializable<?> forUrn(final String urn) { | ||
return new NotSerializable<PTransform<?, ?>>() { | ||
@Override | ||
public String getUrn(PTransform<?, ?> transform) { | ||
public String getUrn() { | ||
return urn; | ||
} | ||
}; | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think there's a CoderUtils that does this for you.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated.