-
Notifications
You must be signed in to change notification settings - Fork 4.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #32445 Add various utility meta-transforms to Beam.
- Loading branch information
Showing
13 changed files
with
490 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
91 changes: 91 additions & 0 deletions
91
sdks/java/core/src/main/java/org/apache/beam/sdk/transforms/Tee.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.beam.sdk.transforms; | ||
|
||
import java.util.function.Consumer; | ||
import org.apache.beam.sdk.values.PCollection; | ||
import org.apache.beam.sdk.values.PCollectionTuple; | ||
|
||
/** | ||
* A PTransform that returns its input, but also applies its input to an auxiliary PTransform, akin | ||
* to the shell {@code tee} command, which is named after the T-splitter used in plumbing. | ||
* | ||
* <p>This can be useful to write out or otherwise process an intermediate transform without | ||
* breaking the linear flow of a chain of transforms, e.g. | ||
* | ||
* <pre><code> | ||
* {@literal PCollection<T>} input = ... ; | ||
* {@literal PCollection<T>} result = | ||
* {@literal input.apply(...)} | ||
* ... | ||
* {@literal input.apply(Tee.of(someSideTransform)} | ||
* ... | ||
* {@literal input.apply(...)}; | ||
* </code></pre> | ||
* | ||
* @param <T> the element type of the input PCollection | ||
*/ | ||
public class Tee<T> extends PTransform<PCollection<T>, PCollection<T>> { | ||
private final PTransform<PCollection<T>, ?> consumer; | ||
|
||
/** | ||
* Returns a new Tee PTransform that will apply an auxilary transform to the input as well as pass | ||
* it on. | ||
* | ||
* @param consumer An additional PTransform that should process the input PCollection. Its output | ||
* will be ignored. | ||
* @param <T> the type of the elements in the input {@code PCollection}. | ||
*/ | ||
public static <T> Tee<T> of(PTransform<PCollection<T>, ?> consumer) { | ||
return new Tee<>(consumer); | ||
} | ||
|
||
/** | ||
* Returns a new Tee PTransform that will apply an auxilary transform to the input as well as pass | ||
* it on. | ||
* | ||
* @param consumer An arbitrary {@link Consumer} that will be wrapped in a PTransform and applied | ||
* to the input. Its output will be ignored. | ||
* @param <T> the type of the elements in the input {@code PCollection}. | ||
*/ | ||
public static <T> Tee<T> of(Consumer<PCollection<T>> consumer) { | ||
return of( | ||
new PTransform<PCollection<T>, PCollectionTuple>() { | ||
@Override | ||
public PCollectionTuple expand(PCollection<T> input) { | ||
consumer.accept(input); | ||
return PCollectionTuple.empty(input.getPipeline()); | ||
} | ||
}); | ||
} | ||
|
||
private Tee(PTransform<PCollection<T>, ?> consumer) { | ||
this.consumer = consumer; | ||
} | ||
|
||
@Override | ||
public PCollection<T> expand(PCollection<T> input) { | ||
input.apply(consumer); | ||
return input; | ||
} | ||
|
||
@Override | ||
protected String getKindString() { | ||
return "Tee(" + consumer.getName() + ")"; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
84 changes: 84 additions & 0 deletions
84
sdks/java/core/src/test/java/org/apache/beam/sdk/transforms/TeeTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
package org.apache.beam.sdk.transforms; | ||
|
||
import static org.hamcrest.MatcherAssert.assertThat; | ||
import static org.hamcrest.Matchers.containsInAnyOrder; | ||
|
||
import java.util.Arrays; | ||
import java.util.Collection; | ||
import java.util.List; | ||
import java.util.UUID; | ||
import org.apache.beam.sdk.testing.NeedsRunner; | ||
import org.apache.beam.sdk.testing.PAssert; | ||
import org.apache.beam.sdk.testing.TestPipeline; | ||
import org.apache.beam.sdk.values.PCollection; | ||
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.HashMultimap; | ||
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Multimap; | ||
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.Multimaps; | ||
import org.junit.Rule; | ||
import org.junit.Test; | ||
import org.junit.experimental.categories.Category; | ||
import org.junit.runner.RunWith; | ||
import org.junit.runners.JUnit4; | ||
|
||
/** Tests for Tee. */ | ||
@RunWith(JUnit4.class) | ||
public class TeeTest { | ||
|
||
@Rule public final transient TestPipeline p = TestPipeline.create(); | ||
|
||
@Test | ||
@Category(NeedsRunner.class) | ||
public void testTee() { | ||
List<String> elements = Arrays.asList("a", "b", "c"); | ||
CollectToMemory<String> collector = new CollectToMemory<>(); | ||
PCollection<String> output = p.apply(Create.of(elements)).apply(Tee.of(collector)); | ||
|
||
PAssert.that(output).containsInAnyOrder(elements); | ||
p.run().waitUntilFinish(); | ||
|
||
// Here we assert that this "sink" had the correct side effects. | ||
assertThat(collector.get(), containsInAnyOrder(elements.toArray(new String[3]))); | ||
} | ||
|
||
private static class CollectToMemory<T> extends PTransform<PCollection<T>, PCollection<Void>> { | ||
|
||
private static final Multimap<UUID, Object> ALL_ELEMENTS = | ||
Multimaps.synchronizedMultimap(HashMultimap.<UUID, Object>create()); | ||
|
||
UUID uuid = UUID.randomUUID(); | ||
|
||
@Override | ||
public PCollection<Void> expand(PCollection<T> input) { | ||
return input.apply( | ||
ParDo.of( | ||
new DoFn<T, Void>() { | ||
@ProcessElement | ||
public void processElement(ProcessContext c) { | ||
ALL_ELEMENTS.put(uuid, c.element()); | ||
} | ||
})); | ||
} | ||
|
||
@SuppressWarnings("unchecked") | ||
public Collection<T> get() { | ||
return (Collection<T>) ALL_ELEMENTS.get(uuid); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.