nemo-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] johnyangk commented on a change in pull request #123: [NEMO-129] Support Beam's WindowedWordCount example
Date Sat, 13 Oct 2018 08:29:37 GMT
johnyangk commented on a change in pull request #123: [NEMO-129] Support Beam's WindowedWordCount
example
URL: https://github.com/apache/incubator-nemo/pull/123#discussion_r224952962
 
 

 ##########
 File path: compiler/frontend/beam/src/main/java/org/apache/nemo/compiler/frontend/beam/transform/GroupByKeyTransform.java
 ##########
 @@ -26,53 +32,172 @@
 
 /**
  * Group Beam KVs.
- * @param <I> input type.
+ * @param <K> key type.
+ * @param <InputT> input type.
  */
-public final class GroupByKeyTransform<I> implements Transform<I, WindowedValue<KV<Object,
List>>> {
+public final class GroupByKeyTransform<K, InputT>
+    extends AbstractTransform<KV<K, InputT>, KeyedWorkItem<K, InputT>, KV<K,
Iterable<InputT>>> {
   private static final Logger LOG = LoggerFactory.getLogger(GroupByKeyTransform.class.getName());
-  private final Map<Object, List> keyToValues;
-  private OutputCollector<WindowedValue<KV<Object, List>>> outputCollector;
+
+  private final SystemReduceFn reduceFn;
+  private transient TimerInternalsFactory timerInternalsFactory;
 
   /**
    * GroupByKey constructor.
    */
-  public GroupByKeyTransform() {
-    this.keyToValues = new HashMap<>();
+  public GroupByKeyTransform(final Map<TupleTag<?>, Coder<?>> outputCoders,
+                             final TupleTag<KV<K, Iterable<InputT>>> mainOutputTag,
+                             final List<TupleTag<?>> additionalOutputTags,
+                             final WindowingStrategy<?, ?> windowingStrategy,
+                             final Collection<PCollectionView<?>> sideInputs,
+                             final PipelineOptions options,
+                             final SystemReduceFn reduceFn) {
+    super(null, /* doFn */
+      null, /* inputCoder */
+      outputCoders,
+      mainOutputTag,
+      additionalOutputTags,
+      windowingStrategy,
+      sideInputs,
+      options);
+    this.reduceFn = reduceFn;
   }
 
+  /**
+   * This creates a new DoFn that groups elements by key and window.
+   * @param doFn original doFn.
+   * @return GroupAlsoByWindowViaWindowSetNewDoFn
+   */
   @Override
-  public void prepare(final Context context, final OutputCollector<WindowedValue<KV<Object,
List>>> oc) {
-    this.outputCollector = oc;
+  protected DoFn wrapDoFn(final DoFn doFn) {
+    timerInternalsFactory = new InMemoryTimerInternalsFactory();
+    return
+      GroupAlsoByWindowViaWindowSetNewDoFn.create(
+        getWindowingStrategy(),
+        new InMemoryStateInternalsFactory(),
+        timerInternalsFactory,
+        getSideInputReader(),
+        reduceFn,
+        getOutputManager(),
+        getMainOutputTag());
   }
 
   @Override
-  public void onData(final I element) {
-    // TODO #129: support window in group by key for windowed groupByKey
-    final WindowedValue<KV> windowedValue = (WindowedValue<KV>) element;
-    final KV kv = windowedValue.getValue();
-    keyToValues.putIfAbsent(kv.getKey(), new ArrayList());
-    keyToValues.get(kv.getKey()).add(kv.getValue());
+  public void onData(final WindowedValue<KV<K, InputT>> element) {
+    // The GroupAlsoByWindowViaWindowSetNewDoFn requires KeyedWorkItem,
+    // so we convert the KV to KeyedWorkItem
+    final KV<K, InputT> kv = element.getValue();
+    final KeyedWorkItem<K, InputT> keyedWorkItem =
+      KeyedWorkItems.elementsWorkItem(kv.getKey(),
+        Collections.singletonList(element.withValue(kv.getValue())));
+
+    getDoFnRunner().processElement(WindowedValue.valueInGlobalWindow(keyedWorkItem));
   }
 
+  /**
+   * This advances the input watermark and processing time to the timestamp max value
+   * in order to emit all data.
+   */
   @Override
-  public void close() {
-    // TODO #129: support window in group by key for windowed groupByKey
-    if (keyToValues.isEmpty()) {
-      LOG.warn("Beam GroupByKeyTransform received no data!");
-    } else {
-      keyToValues.entrySet().stream().map(entry ->
-        WindowedValue.valueInGlobalWindow(KV.of(entry.getKey(), entry.getValue())))
-          .forEach(outputCollector::emit);
-      keyToValues.clear();
-    }
+  protected void beforeClose() {
+    final InMemoryTimerInternalsFactory imTimerFactory =
+      (InMemoryTimerInternalsFactory) timerInternalsFactory;
+
+    imTimerFactory.internalsMap.entrySet().stream()
+      .forEach(entry -> {
+        final K key = entry.getKey();
+        final InMemoryTimerInternals timerInternals = entry.getValue();
+
+        try {
+          // Finish any pending windows by advancing the input watermark to infinity.
+          timerInternals.advanceInputWatermark(BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+          // Finally, advance the processing time to infinity to fire any timers.
+          timerInternals.advanceProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
+          timerInternals.advanceSynchronizedProcessingTime(BoundedWindow.TIMESTAMP_MAX_VALUE);
+
+          fireEligibleTimers(key, timerInternals);
+        } catch (final Exception e) {
+          e.printStackTrace();
 
 Review comment:
   Please throw an exception to fail the job.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

Mime
View raw message