apex-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (APEXMALHAR-1897) Create ManagedState
Date Mon, 21 Mar 2016 04:35:25 GMT

    [ https://issues.apache.org/jira/browse/APEXMALHAR-1897?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15203695#comment-15203695
] 

ASF GitHub Bot commented on APEXMALHAR-1897:
--------------------------------------------

Github user ilooner commented on a diff in the pull request:

    https://github.com/apache/incubator-apex-malhar/pull/145#discussion_r56781864
  
    --- Diff: library/src/main/java/com/datatorrent/lib/state/managed/ManagedTimeUnifiedStateImpl.java
---
    @@ -0,0 +1,243 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package com.datatorrent.lib.state.managed;
    +
    +import java.io.DataInputStream;
    +import java.io.DataOutputStream;
    +import java.io.IOException;
    +import java.util.concurrent.Future;
    +import java.util.concurrent.LinkedBlockingQueue;
    +
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import org.apache.hadoop.fs.LocatedFileStatus;
    +import org.apache.hadoop.fs.RemoteIterator;
    +
    +import com.google.common.base.Preconditions;
    +import com.google.common.collect.Queues;
    +import com.google.common.util.concurrent.Futures;
    +
    +import com.datatorrent.lib.fileaccess.FileAccess;
    +import com.datatorrent.lib.state.BucketedState;
    +import com.datatorrent.netlet.util.Slice;
    +
    +/**
    + * In this implementation of {@link ManagedState} the buckets in memory are time-buckets.
    + * <p/>
    + *
    + * <b>Difference from {@link ManagedTimeStateImpl}</b>: <br/>
    + * <ol>
    + * <li>The main buckets in {@link ManagedTimeStateImpl} are unique adhoc long ids
which the user provides with the
    + * key. In this implementation the main buckets are time buckets. The user provides just
the time and the time bucket is
    + * derived from it.
    + * </li>
    + * <br/>
    + *
    + * <li>In regards to the bucket data on disk, in {@link ManagedTimeStateImpl} the
buckets are persisted on disk
    + * with each bucket data further grouped into time-buckets: {base_path}/{bucketId}/{time-bucket
id}. <br/>
    + * In this implementation operator id is used as bucketId (on disk) and there is just
one time-bucket under a
    + * particular operator id:
    + * {base_path}/{operator id}/{time bucket id}.
    + * </li>
    + * <br/>
    + *
    + * <li>In {@link ManagedTimeStateImpl} a bucket belongs to just one partition.
Multiple partitions cannot write to
    + * the same bucket. <br/>
    + * In this implementation multiple partitions can be working with the same time-bucket
(since time-bucket is derived
    + * from time). This works because on the disk the time-bucket data is segregated under
each operator id.
    + * </li>
    + * <br/>
    + *
    + * <li>While {@link ManagedTimeStateImpl} can support dynamic partitioning by pre-allocating
buckets this will not
    + * be able to support dynamic partitioning efficiently.
    + * </li>
    +
    + * </ol>
    + */
    +public class ManagedTimeUnifiedStateImpl extends AbstractManagedStateImpl implements
BucketedState
    +{
    +  private final transient LinkedBlockingQueue<Long> purgedTimeBuckets = Queues.newLinkedBlockingQueue();
    +
    +  public ManagedTimeUnifiedStateImpl()
    +  {
    +    bucketsFileSystem = new TimeUnifiedBucketsFileSystem();
    +  }
    +
    +  @Override
    +  public int getNumBuckets()
    +  {
    +    return timeBucketAssigner.getNumBuckets();
    +  }
    +
    +  @Override
    +  public void put(long time, Slice key, Slice value)
    +  {
    +    long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
    +    if (timeBucket == -1) {
    +      //time is expired so return null.
    +      return;
    +    }
    +    int bucketIdx = prepareBucket(timeBucket);
    +
    +    buckets[bucketIdx].put(key, timeBucket, value);
    +
    +  }
    +
    +  @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
    +  @Override
    +  public Slice getSync(long time, Slice key)
    +  {
    +    long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
    +    if (timeBucket == -1) {
    +      //time is expired so return expired slice.
    +      return BucketedState.EXPIRED;
    +    }
    +    int bucketIdx = prepareBucket(timeBucket);
    +    Bucket bucket = buckets[bucketIdx];
    +
    +    synchronized (bucket) {
    +      return bucket.get(key, timeBucket, Bucket.ReadSource.ALL);
    +    }
    +  }
    +
    +  @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
    +  @Override
    +  public Future<Slice> getAsync(long time, Slice key)
    +  {
    +    long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
    +    if (timeBucket == -1) {
    +      //time is expired so return null.
    +      return Futures.immediateFuture(BucketedState.EXPIRED);
    +    }
    +    int bucketIdx = prepareBucket(timeBucket);
    +    Bucket bucket = buckets[bucketIdx];
    +    synchronized (bucket) {
    +      Slice cachedVal = buckets[bucketIdx].get(key, timeBucket, Bucket.ReadSource.MEMORY);
    +      if (cachedVal != null) {
    +        return Futures.immediateFuture(cachedVal);
    +      }
    +      return readerService.submit(new KeyFetchTask(bucket, key, timeBucket, throwable));
    +    }
    +  }
    +
    +  @Override
    +  public void endWindow()
    +  {
    +    super.endWindow();
    +    Long purgedTimeBucket;
    +
    +    //tear down all the purged time buckets
    +    while (null != (purgedTimeBucket = purgedTimeBuckets.poll())) {
    +      int purgedTimeBucketIdx = getBucketIdx(purgedTimeBucket);
    +      if (buckets[purgedTimeBucketIdx] != null && buckets[purgedTimeBucketIdx].getBucketId()
== purgedTimeBucket) {
    +        buckets[purgedTimeBucketIdx].teardown();
    +        buckets[purgedTimeBucketIdx] = null;
    +      }
    +    }
    +  }
    +
    +  @Override
    +  protected void handleBucketConflict(int bucketIdx, long newBucketId)
    +  {
    +    Preconditions.checkArgument(buckets[bucketIdx].getBucketId() < newBucketId, "new
time bucket should have a value"
    +        + " greater than the old time bucket");
    +    //Time buckets are purged periodically so here a bucket conflict is expected and
so we just ignore conflicts.
    +    buckets[bucketIdx].teardown();
    +    buckets[bucketIdx] = newBucket(newBucketId);
    +    buckets[bucketIdx].setup(this);
    +  }
    +
    +  @Override
    +  public void purgeTimeBucketsLessThanEqualTo(long timeBucket)
    +  {
    +    purgedTimeBuckets.add(timeBucket);
    +    super.purgeTimeBucketsLessThanEqualTo(timeBucket);
    +  }
    +
    +  /**
    +   * This uses operator id instead of bucket id as the name of parent folder of time-buckets.
This is because
    +   * multiple partitions may work on same time-buckets.
    +   */
    +  public static class TimeUnifiedBucketsFileSystem extends BucketsFileSystem
    +  {
    --- End diff --
    
    Missing Override annotations on methods for this class.


> Create ManagedState
> -------------------
>
>                 Key: APEXMALHAR-1897
>                 URL: https://issues.apache.org/jira/browse/APEXMALHAR-1897
>             Project: Apache Apex Malhar
>          Issue Type: Sub-task
>            Reporter: Chandni Singh
>            Assignee: Chandni Singh
>             Fix For: 3.4.0
>
>
> ManagedState is described in the document below:
> https://docs.google.com/document/d/1gRWN9ufKSZSZD0N-pthlhpC9TZ8KwJ6hJlAX6nxl5f8/edit#heading=h.z87ti1fwyt0t



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message