apex-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (APEXMALHAR-1897) Create ManagedState
Date Sun, 20 Mar 2016 15:33:33 GMT

    [ https://issues.apache.org/jira/browse/APEXMALHAR-1897?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15203328#comment-15203328
] 

ASF GitHub Bot commented on APEXMALHAR-1897:
--------------------------------------------

Github user amberarrow commented on a diff in the pull request:

    https://github.com/apache/incubator-apex-malhar/pull/145#discussion_r56766289
  
    --- Diff: library/src/main/java/com/datatorrent/lib/state/managed/ManagedTimeUnifiedStateImpl.java
---
    @@ -0,0 +1,238 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +package com.datatorrent.lib.state.managed;
    +
    +import java.io.DataInputStream;
    +import java.io.DataOutputStream;
    +import java.io.IOException;
    +import java.util.concurrent.Future;
    +import java.util.concurrent.LinkedBlockingQueue;
    +
    +import org.slf4j.Logger;
    +import org.slf4j.LoggerFactory;
    +
    +import org.apache.hadoop.fs.LocatedFileStatus;
    +import org.apache.hadoop.fs.RemoteIterator;
    +
    +import com.google.common.base.Preconditions;
    +import com.google.common.collect.Queues;
    +import com.google.common.util.concurrent.Futures;
    +
    +import com.datatorrent.lib.fileaccess.FileAccess;
    +import com.datatorrent.lib.state.BucketedState;
    +import com.datatorrent.netlet.util.Slice;
    +
    +/**
    + * In this implementation of {@link ManagedState} the buckets in memory are time-buckets.
    + * <p/>
    + *
    + * <b>Difference from {@link ManagedTimeStateImpl}</b>: <br/>
    + * <ol>
    + * <li>The main buckets in {@link ManagedTimeStateImpl} are unique adhoc long ids
which the user provides with the
    + * key. In this implementation the main buckets are time buckets. The user provides just
the time and the time bucket is
    + * derived from it.
    + * </li>
    + * <br/>
    + *
    + * <li>In regards to the bucket data on disk, in {@link ManagedTimeStateImpl} the
buckets are persisted on disk
    + * with each bucket data further grouped into time-buckets: {base_path}/{bucketId}/{time-bucket
id}. <br/>
    + * In this implementation operator id is used as bucketId (on disk) and there is just
one time-bucket under a
    + * particular operator id:
    + * {base_path}/{operator id}/{time bucket id}.
    + * </li>
    + * <br/>
    + *
    + * <li>In {@link ManagedTimeStateImpl} a bucket belongs to just one partition.
Multiple partitions cannot write to
    + * the same bucket. <br/>
    + * In this implementation multiple partitions can be working with the same time-bucket
(since time-bucket is derived
    + * from time). This works because on the disk the time-bucket data is segregated under
each operator id.
    + * </li>
    + * <br/>
    + *
    + * <li>While {@link ManagedTimeStateImpl} can support dynamic partitioning by pre-allocating
buckets this will not
    + * be able to support dynamic partitioning efficiently.
    + * </li>
    +
    + * </ol>
    + */
    +public class ManagedTimeUnifiedStateImpl extends AbstractManagedStateImpl implements
BucketedState
    +{
    +  private final transient LinkedBlockingQueue<Long> purgedTimeBuckets = Queues.newLinkedBlockingQueue();
    +
    +  public ManagedTimeUnifiedStateImpl()
    +  {
    +    bucketsFileSystem = new TimeUnifiedBucketsFileSystem();
    +  }
    +
    +  @Override
    +  public int getNumBuckets()
    +  {
    +    return timeBucketAssigner.getNumBuckets();
    +  }
    +
    +  @Override
    +  public void put(long time, Slice key, Slice value)
    +  {
    +    long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
    +    if (timeBucket == -1) {
    +      //time is expired so return null.
    +      return;
    +    }
    +    int bucketIdx = prepareBucket(timeBucket);
    +
    +    buckets[bucketIdx].put(key, timeBucket, value);
    +
    +  }
    +
    +  @Override
    +  public Slice getSync(long time, Slice key)
    +  {
    +    long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
    +    if (timeBucket == -1) {
    +      //time is expired so return null.
    +      return BucketedState.EXPIRED;
    +    }
    +    int bucketIdx = prepareBucket(timeBucket);
    +    return buckets[bucketIdx].get(key, timeBucket, Bucket.ReadSource.ALL);
    +  }
    +
    +  @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")
    +  @Override
    +  public Future<Slice> getAsync(long time, Slice key)
    +  {
    +    long timeBucket = timeBucketAssigner.getTimeBucketFor(time);
    +    if (timeBucket == -1) {
    +      //time is expired so return null.
    +      return Futures.immediateFuture(BucketedState.EXPIRED);
    +    }
    +    int bucketIdx = prepareBucket(timeBucket);
    +    Bucket bucket = buckets[bucketIdx];
    +    synchronized (bucket) {
    +      Slice cachedVal = buckets[bucketIdx].get(key, timeBucket, Bucket.ReadSource.MEMORY);
    --- End diff --
    
    buckets[bucketIdx] => bucket


> Create ManagedState
> -------------------
>
>                 Key: APEXMALHAR-1897
>                 URL: https://issues.apache.org/jira/browse/APEXMALHAR-1897
>             Project: Apache Apex Malhar
>          Issue Type: Sub-task
>            Reporter: Chandni Singh
>            Assignee: Chandni Singh
>             Fix For: 3.4.0
>
>
> ManagedState is described in the document below:
> https://docs.google.com/document/d/1gRWN9ufKSZSZD0N-pthlhpC9TZ8KwJ6hJlAX6nxl5f8/edit#heading=h.z87ti1fwyt0t



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message