falcon-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ajayyad...@apache.org
Subject [4/4] falcon git commit: FALCON-1230 Data based notification Service to notify execution instances when data becomes available. Contributed by Pavan Kumar Kolamuri.
Date Mon, 11 Jan 2016 09:47:24 GMT
FALCON-1230 Data based notification Service to notify execution instances when data becomes
available. Contributed by Pavan Kumar Kolamuri.


Project: http://git-wip-us.apache.org/repos/asf/falcon/repo
Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/4656f692
Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/4656f692
Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/4656f692

Branch: refs/heads/master
Commit: 4656f692a3c96244f0291501c3b68e14af964f27
Parents: 65bd4d1
Author: Ajay Yadava <ajaynsit@gmail.com>
Authored: Mon Jan 11 14:54:34 2016 +0530
Committer: Ajay Yadava <ajaynsit@gmail.com>
Committed: Mon Jan 11 14:54:34 2016 +0530

----------------------------------------------------------------------
 CHANGES.txt                                     |   2 +
 .../execution/ProcessExecutionInstance.java     |  38 ++--
 .../notification/service/event/DataEvent.java   |  19 +-
 .../service/impl/DataAvailabilityService.java   | 210 +++++++++++++++++--
 .../request/DataNotificationRequest.java        | 124 +++++++++--
 .../org/apache/falcon/predicate/Predicate.java  |  18 +-
 .../execution/FalconExecutionServiceTest.java   |  10 +-
 .../service/DataAvailabilityServiceTest.java    | 135 ++++++++++++
 8 files changed, 482 insertions(+), 74 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/falcon/blob/4656f692/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index e3244de..8792f94 100755
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -12,6 +12,8 @@ Proposed Release Version: 0.9
   INCOMPATIBLE CHANGES
 
   NEW FEATURES
+    FALCON-1230 Data based notification Service to notify execution instances when data becomes
available(Pavan Kumar Kolamuri via Ajay Yadava)
+
     FALCON-1679 API to get type of scheduler(native/oozie) (Pallavi Rao)
 
     FALCON-1645 Ability to export to database(Venkat Ramachandran via Balu Vellanki)

http://git-wip-us.apache.org/repos/asf/falcon/blob/4656f692/scheduler/src/main/java/org/apache/falcon/execution/ProcessExecutionInstance.java
----------------------------------------------------------------------
diff --git a/scheduler/src/main/java/org/apache/falcon/execution/ProcessExecutionInstance.java
b/scheduler/src/main/java/org/apache/falcon/execution/ProcessExecutionInstance.java
index 72e5558..8f026b7 100644
--- a/scheduler/src/main/java/org/apache/falcon/execution/ProcessExecutionInstance.java
+++ b/scheduler/src/main/java/org/apache/falcon/execution/ProcessExecutionInstance.java
@@ -17,6 +17,7 @@
  */
 package org.apache.falcon.execution;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.falcon.FalconException;
 import org.apache.falcon.entity.EntityUtil;
 import org.apache.falcon.entity.FeedHelper;
@@ -119,29 +120,36 @@ public class ProcessExecutionInstance extends ExecutionInstance {
                 continue;
             }
             Feed feed = ConfigurationStore.get().get(EntityType.FEED, input.getFeed());
+            List<Path> paths = new ArrayList<>();
             for (org.apache.falcon.entity.v0.feed.Cluster cluster : feed.getClusters().getClusters())
{
                 List<Location> locations = FeedHelper.getLocations(cluster, feed);
                 for (Location loc : locations) {
                     if (loc.getType() != LocationType.DATA) {
                         continue;
                     }
+                    paths.add(new Path(loc.getPath()));
+                }
 
-                    Predicate predicate = Predicate.createDataPredicate(loc);
-                    // To ensure we evaluate only predicates not evaluated before when an
instance is resumed.
-                    if (isResume && !awaitedPredicates.contains(predicate)) {
-                        continue;
-                    }
-                    // TODO : Revisit this once the Data Availability Service has been built
-                    DataAvailabilityService.DataRequestBuilder requestBuilder =
-                            (DataAvailabilityService.DataRequestBuilder)
-                            NotificationServicesRegistry.getService(NotificationServicesRegistry.SERVICE.DATA)
-                                    .createRequestBuilder(executionService, getId());
-                    requestBuilder.setDataLocation(new Path(loc.getPath()));
-                    NotificationServicesRegistry.register(requestBuilder.build());
-                    LOG.info("Registered for a data notification for process {} for data
location {}",
-                            process.getName(), loc.getPath());
-                    awaitedPredicates.add(predicate);
+                Predicate predicate = Predicate.createDataPredicate(paths);
+                // To ensure we evaluate only predicates not evaluated before when an instance
is resumed.
+                if (isResume && !awaitedPredicates.contains(predicate)) {
+                    continue;
                 }
+                // TODO : Revisit this once the Data Notification Service has been built
+                // TODO Very IMP :  Need to change the polling frequency
+                DataAvailabilityService.DataRequestBuilder requestBuilder =
+                        (DataAvailabilityService.DataRequestBuilder)
+                                NotificationServicesRegistry.getService(NotificationServicesRegistry.SERVICE.DATA)
+                                        .createRequestBuilder(executionService, getId());
+                requestBuilder.setLocations(paths)
+                        .setCluster(cluster.getName())
+                        .setPollingFrequencyInMillis(100)
+                        .setTimeoutInMillis(getTimeOutInMillis())
+                        .setLocations(paths);
+                NotificationServicesRegistry.register(requestBuilder.build());
+                LOG.info("Registered for a data notification for process {} for data location
{}",
+                        process.getName(), StringUtils.join(paths, ","));
+                awaitedPredicates.add(predicate);
             }
         }
     }

http://git-wip-us.apache.org/repos/asf/falcon/blob/4656f692/scheduler/src/main/java/org/apache/falcon/notification/service/event/DataEvent.java
----------------------------------------------------------------------
diff --git a/scheduler/src/main/java/org/apache/falcon/notification/service/event/DataEvent.java
b/scheduler/src/main/java/org/apache/falcon/notification/service/event/DataEvent.java
index 1036339..083f66c 100644
--- a/scheduler/src/main/java/org/apache/falcon/notification/service/event/DataEvent.java
+++ b/scheduler/src/main/java/org/apache/falcon/notification/service/event/DataEvent.java
@@ -18,18 +18,18 @@
 package org.apache.falcon.notification.service.event;
 
 
-import org.apache.falcon.entity.v0.feed.LocationType;
 import org.apache.falcon.state.ID;
 import org.apache.hadoop.fs.Path;
 
+import java.util.List;
+
 /**
  * An event generated by {@link org.apache.falcon.notification.service.impl.DataAvailabilityService}
  * indicating availability or non-availability of a dataset.
  */
 public class DataEvent extends Event {
     private final ID callbackID;
-    private Path dataLocation;
-    private LocationType dataType;
+    private List<Path> dataLocations;
     private STATUS status;
 
     /**
@@ -40,10 +40,9 @@ public class DataEvent extends Event {
         UNAVAILABLE
     }
 
-    public DataEvent(ID callbackID, Path location, LocationType locType, STATUS availability)
{
+    public DataEvent(ID callbackID, List<Path> dataLocations, STATUS availability)
{
         this.callbackID = callbackID;
-        this.dataLocation = location;
-        this.dataType = locType;
+        this.dataLocations = dataLocations;
         this.status = availability;
         this.type = EventType.DATA_AVAILABLE;
     }
@@ -56,12 +55,12 @@ public class DataEvent extends Event {
         this.status = availability;
     }
 
-    public Path getDataLocation() {
-        return dataLocation;
+    public List<Path> getDataLocations() {
+        return dataLocations;
     }
 
-    public LocationType getDataType() {
-        return dataType;
+    public void setDataLocations(List<Path> locations) {
+        this.dataLocations = locations;
     }
 
     @Override

http://git-wip-us.apache.org/repos/asf/falcon/blob/4656f692/scheduler/src/main/java/org/apache/falcon/notification/service/impl/DataAvailabilityService.java
----------------------------------------------------------------------
diff --git a/scheduler/src/main/java/org/apache/falcon/notification/service/impl/DataAvailabilityService.java
b/scheduler/src/main/java/org/apache/falcon/notification/service/impl/DataAvailabilityService.java
index 7ffb351..732da62 100644
--- a/scheduler/src/main/java/org/apache/falcon/notification/service/impl/DataAvailabilityService.java
+++ b/scheduler/src/main/java/org/apache/falcon/notification/service/impl/DataAvailabilityService.java
@@ -18,29 +18,62 @@
 package org.apache.falcon.notification.service.impl;
 
 import org.apache.falcon.FalconException;
+import org.apache.falcon.entity.ClusterHelper;
+import org.apache.falcon.entity.EntityUtil;
+import org.apache.falcon.entity.v0.Entity;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.cluster.Cluster;
 import org.apache.falcon.exception.NotificationServiceException;
 import org.apache.falcon.execution.NotificationHandler;
+import org.apache.falcon.hadoop.HadoopClientFactory;
 import org.apache.falcon.notification.service.FalconNotificationService;
+import org.apache.falcon.notification.service.event.DataEvent;
 import org.apache.falcon.notification.service.request.DataNotificationRequest;
 import org.apache.falcon.notification.service.request.NotificationRequest;
 import org.apache.falcon.state.ID;
+import org.apache.falcon.util.StartupProperties;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.DelayQueue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
 
 /**
  * This notification service notifies {@link NotificationHandler} when requested data
  * becomes available. This class also supports time out, in which case it notifies about
the unavailability.
- * TODO : Complete/Modify this skeletal class
  */
 public class DataAvailabilityService implements FalconNotificationService {
 
+    private static final Logger LOG = LoggerFactory.getLogger(DataAvailabilityService.class);
+    private static final String NUM_THREADS_PROP = "scheduler.data.notification.service.threads";
+    private static final String DEFAULT_NUM_THREADS = "5";
+
+    private DelayQueue<DataNotificationRequest> delayQueue = new DelayQueue<>();
+    private ExecutorService executorService;
+    // It contains all instances which are unregistered and can be ignored.
+    private Map<ID, NotificationHandler> instancesToIgnore;
+
     @Override
     public void register(NotificationRequest request) throws NotificationServiceException
{
-        // TODO : Implement this
+        LOG.info("Registering Data notification for " + request.getCallbackId().toString());
+        DataNotificationRequest dataNotificationRequest = (DataNotificationRequest) request;
+        delayQueue.offer(dataNotificationRequest);
     }
 
     @Override
     public void unregister(NotificationHandler handler, ID listenerID) {
-        // TODO : Implement this
+        LOG.info("Removing Data notification Request with callbackID {}", listenerID.getKey());
+        instancesToIgnore.put(listenerID, handler);
     }
 
     @Override
@@ -55,40 +88,185 @@ public class DataAvailabilityService implements FalconNotificationService
{
 
     @Override
     public void init() throws FalconException {
-        // TODO : Implement this
+        int executorThreads = Integer.parseInt(StartupProperties.get().
+                getProperty(NUM_THREADS_PROP, DEFAULT_NUM_THREADS));
+        executorService = Executors.newFixedThreadPool(executorThreads);
+        for (int i = 0; i < executorThreads; i++) {
+            executorService.execute(new EventConsumer());
+        }
+        instancesToIgnore = new ConcurrentHashMap<>();
     }
 
     @Override
     public void destroy() throws FalconException {
-
+        instancesToIgnore.clear();
+        delayQueue.clear();
+        executorService.shutdown();
     }
 
     /**
      * Builds {@link DataNotificationRequest}.
      */
     public static class DataRequestBuilder extends RequestBuilder<DataNotificationRequest>
{
-        private Path dataLocation;
+        private String cluster;
+        private long pollingFrequencyInMillis;
+        private long timeoutInMillis;
+        private Map<Path, Boolean> locations;
 
         public DataRequestBuilder(NotificationHandler handler, ID callbackID) {
             super(handler, callbackID);
         }
 
-        /**
-         * @param location
-         * @return This instance
-         */
-        public DataRequestBuilder setDataLocation(Path location) {
-            this.dataLocation = location;
+        public DataRequestBuilder setLocations(List<Path> locPaths) {
+            Map<Path, Boolean> paths = new HashMap<>();
+            for (Path loc : locPaths) {
+                paths.put(loc, false);
+            }
+            this.locations = paths;
             return this;
         }
 
         @Override
         public DataNotificationRequest build() {
-            if (callbackId == null  || dataLocation == null) {
-                throw new IllegalArgumentException("Missing one or more of the mandatory
arguments:"
-                        + " callbackId, dataLocation");
+            if (callbackId == null || locations == null
+                    || cluster == null || pollingFrequencyInMillis <= 0
+                    || timeoutInMillis < pollingFrequencyInMillis) {
+                throw new IllegalArgumentException("Missing or incorrect, one or more of
the mandatory arguments:"
+                        + " callbackId, locations, dataType, cluster, pollingFrequency, waitTime");
+            }
+            return new DataNotificationRequest(handler, callbackId, cluster,
+                    pollingFrequencyInMillis, timeoutInMillis, locations);
+        }
+
+        public DataRequestBuilder setCluster(String clusterName) {
+            this.cluster = clusterName;
+            return this;
+        }
+
+        public DataRequestBuilder setPollingFrequencyInMillis(long pollingFreq) {
+            if (pollingFreq <= 0) {
+                throw new IllegalArgumentException("PollingFrequency should be greater than
zero");
+            }
+            this.pollingFrequencyInMillis = pollingFreq;
+            return this;
+        }
+
+        public DataRequestBuilder setTimeoutInMillis(long timeout) {
+            if (timeout <= 0 || timeout < pollingFrequencyInMillis) {
+                throw new IllegalArgumentException("Timeout should be positive and greater
than PollingFrequency");
+            }
+            this.timeoutInMillis = timeout;
+            return this;
+        }
+    }
+
+
+    private class EventConsumer implements Runnable {
+
+        public EventConsumer() {
+        }
+
+        @Override
+        public void run() {
+            DataNotificationRequest dataNotificationRequest;
+            while (!Thread.currentThread().isInterrupted()) {
+                try {
+                    dataNotificationRequest = delayQueue.take();
+                    boolean isUnRegistered = isUnRegistered(dataNotificationRequest);
+                    if (isUnRegistered) {
+                        continue;
+                    }
+                    boolean isDataArrived = checkConditions(dataNotificationRequest);
+                    if (isDataArrived) {
+                        notifyHandler(dataNotificationRequest, DataEvent.STATUS.AVAILABLE);
+                    } else {
+                        if (dataNotificationRequest.isTimedout()) {
+                            notifyHandler(dataNotificationRequest, DataEvent.STATUS.UNAVAILABLE);
+                            continue;
+                        }
+                        dataNotificationRequest.accessed();
+                        delayQueue.offer(dataNotificationRequest);
+                    }
+                } catch (Throwable e) {
+                    LOG.error("Error in Data Notification Service EventConsumer", e);
+                }
+            }
+        }
+
+        private void notifyHandler(DataNotificationRequest dataNotificationRequest,
+                                   DataEvent.STATUS status) {
+            DataEvent dataEvent = new DataEvent(dataNotificationRequest.getCallbackId(),
+                    dataNotificationRequest.getLocations(), status);
+            boolean isUnRegistered = isUnRegistered(dataNotificationRequest);
+            if (isUnRegistered) {
+                return;
             }
-            return new DataNotificationRequest(handler, callbackId, dataLocation);
+            try {
+                LOG.debug("Notifying Handler for Data Notification Request of id {} " ,
+                        dataNotificationRequest.getCallbackId().toString());
+                dataNotificationRequest.getHandler().onEvent(dataEvent);
+            } catch (FalconException e) {
+                LOG.error("Unable to notify Data event with id {} ",
+                        dataNotificationRequest.getCallbackId(), e);
+                // ToDo Retries for notifying
+            }
+        }
+
+        private boolean isUnRegistered(DataNotificationRequest dataNotificationRequest) {
+            if (instancesToIgnore.containsKey(dataNotificationRequest.getCallbackId())) {
+                LOG.info("Ignoring Data Notification Request of id {} ",
+                        dataNotificationRequest.getCallbackId().toString());
+                instancesToIgnore.remove(dataNotificationRequest.getCallbackId());
+                return true;
+            }
+            return false;
+        }
+
+        private boolean checkConditions(DataNotificationRequest dataNotificationRequest)
{
+            try {
+                Entity entity = EntityUtil.getEntity(EntityType.CLUSTER, dataNotificationRequest.getCluster());
+                Cluster clusterEntity = (Cluster) entity;
+                Configuration conf = ClusterHelper.getConfiguration(clusterEntity);
+                FileSystem fs = HadoopClientFactory.get().createProxiedFileSystem(conf);
+                Map<Path, Boolean> locations = dataNotificationRequest.getLocationMap();
+                List<Path> nonAvailablePaths = getUnAvailablePaths(locations);
+                updatePathsAvailability(nonAvailablePaths, fs, locations);
+                if (allPathsExist(locations)) {
+                    return true;
+                }
+            } catch (FalconException e) {
+                LOG.error("Retrieving the Cluster Entity " + e);
+            } catch (IOException e) {
+                LOG.error("Unable to connect to FileSystem " + e);
+            }
+            return false;
+        }
+
+        private void updatePathsAvailability(List<Path> unAvailablePaths, FileSystem
fs,
+                                             Map<Path, Boolean> locations) throws IOException
{
+            for (Path path : unAvailablePaths) {
+                if (fs.exists(path)) {
+                    locations.put(path, true);
+                }
+            }
+        }
+
+        private List<Path> getUnAvailablePaths(Map<Path, Boolean> locations)
{
+            List<Path> paths = new ArrayList<>();
+            for (Map.Entry<Path, Boolean> pathInfo : locations.entrySet()) {
+                if (!pathInfo.getValue()) {
+                    paths.add(pathInfo.getKey());
+                }
+            }
+            return paths;
+        }
+
+        private boolean allPathsExist(Map<Path, Boolean> locations) {
+            if (locations.containsValue(Boolean.FALSE)) {
+                return false;
+            }
+            return true;
         }
     }
+
 }

http://git-wip-us.apache.org/repos/asf/falcon/blob/4656f692/scheduler/src/main/java/org/apache/falcon/notification/service/request/DataNotificationRequest.java
----------------------------------------------------------------------
diff --git a/scheduler/src/main/java/org/apache/falcon/notification/service/request/DataNotificationRequest.java
b/scheduler/src/main/java/org/apache/falcon/notification/service/request/DataNotificationRequest.java
index 8393de0..c7dd5d3 100644
--- a/scheduler/src/main/java/org/apache/falcon/notification/service/request/DataNotificationRequest.java
+++ b/scheduler/src/main/java/org/apache/falcon/notification/service/request/DataNotificationRequest.java
@@ -17,27 +17,34 @@
  */
 package org.apache.falcon.notification.service.request;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.falcon.execution.NotificationHandler;
 import org.apache.falcon.notification.service.NotificationServicesRegistry;
 import org.apache.falcon.state.ID;
 import org.apache.hadoop.fs.Path;
 
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Delayed;
+import java.util.concurrent.TimeUnit;
+
 /**
  * Request intended for {@link import org.apache.falcon.notification.service.impl.DataAvailabilityService}
  * for data notifications.
  * The setter methods of the class support chaining similar to a builder class.
- * TODO : Complete/modify this skeletal class
  */
-public class DataNotificationRequest extends NotificationRequest {
-    private final Path dataLocation;
+public class DataNotificationRequest extends NotificationRequest implements Delayed {
+    // Boolean represents path availability to avoid checking all paths for every poll.
+    private Map<Path, Boolean> locations;
+    private long pollingFrequencyInMillis;
+    private long timeoutInMillis;
     private String cluster;
+    private long accessTimeInMillis;
+    private long createdTimeInMillis;
+    // Represents request was accessed by DataAvailability service first time or not.
+    private boolean isFirst;
 
-    /**
-     * @return data location to be watched.
-     */
-    public Path getDataLocation() {
-        return dataLocation;
-    }
 
     /**
      * Given a number of instances, should the service wait for exactly those many,
@@ -53,27 +60,106 @@ public class DataNotificationRequest extends NotificationRequest {
      * Constructor.
      * @param notifHandler
      * @param callbackId
+     * @param cluster
+     * @param pollingFrequencyInMillis
+     * @param timeoutInMillis
+     * @param locations
      */
-    public DataNotificationRequest(NotificationHandler notifHandler, ID callbackId, Path
location) {
+    public DataNotificationRequest(NotificationHandler notifHandler, ID callbackId,
+                                   String cluster, long pollingFrequencyInMillis,
+                                   long timeoutInMillis, Map<Path, Boolean> locations)
{
         this.handler = notifHandler;
         this.callbackId = callbackId;
-        this.dataLocation = location;
         this.service = NotificationServicesRegistry.SERVICE.DATA;
+        this.cluster = cluster;
+        this.pollingFrequencyInMillis = pollingFrequencyInMillis;
+        this.timeoutInMillis = timeoutInMillis;
+        this.locations = locations;
+        this.accessTimeInMillis = System.currentTimeMillis();
+        this.createdTimeInMillis = accessTimeInMillis;
+        this.isFirst = true;
+    }
+
+
+    public void accessed() {
+        this.accessTimeInMillis = System.currentTimeMillis();
     }
 
-    /**
-     * @return cluster name
-     */
     public String getCluster() {
         return cluster;
     }
 
+
+    public boolean isTimedout() {
+        long currentTimeInMillis = System.currentTimeMillis();
+        if (currentTimeInMillis - createdTimeInMillis > timeoutInMillis) {
+            return true;
+        }
+        return false;
+    }
+
+
     /**
-     * @param clusterName
-     * @return This instance
+     * Obtain list of paths from locations map.
+     * @return List of paths to check.
      */
-    public DataNotificationRequest setCluster(String clusterName) {
-        this.cluster = clusterName;
-        return this;
+    public List<Path> getLocations() {
+        if (this.locations == null) {
+            return null;
+        }
+        List<Path> paths = new ArrayList<>();
+        for (Path path : this.locations.keySet()) {
+            paths.add(path);
+        }
+        return paths;
     }
+
+    /**
+     * @return Map of locations and their availabilities.
+     */
+    public Map<Path, Boolean> getLocationMap() {
+        return this.locations;
+    }
+
+    @Override
+    public long getDelay(TimeUnit unit) {
+        if (isFirst) {
+            this.isFirst = false;
+            return 0;
+        }
+        long age = System.currentTimeMillis() - accessTimeInMillis;
+        return unit.convert(pollingFrequencyInMillis - age, TimeUnit.MILLISECONDS);
+    }
+
+    @Override
+    public int compareTo(Delayed other) {
+        return (int) (this.getDelay(TimeUnit.MILLISECONDS) - other.getDelay(TimeUnit.MILLISECONDS));
+    }
+
+    @Override
+    public boolean equals(Object o) {
+        if (this == o) {
+            return true;
+        }
+        if (o == null || getClass() != o.getClass()) {
+            return false;
+        }
+        DataNotificationRequest that = (DataNotificationRequest) o;
+        if (!StringUtils.equals(cluster, that.cluster)) {
+            return false;
+        }
+        if (!locations.equals(that.locations)) {
+            return false;
+        }
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        int result = cluster.hashCode();
+        result = 31 * result + (locations != null ? locations.hashCode() : 0);
+        return result;
+    }
+
+
 }

http://git-wip-us.apache.org/repos/asf/falcon/blob/4656f692/scheduler/src/main/java/org/apache/falcon/predicate/Predicate.java
----------------------------------------------------------------------
diff --git a/scheduler/src/main/java/org/apache/falcon/predicate/Predicate.java b/scheduler/src/main/java/org/apache/falcon/predicate/Predicate.java
index c7b4f12..c248db6 100644
--- a/scheduler/src/main/java/org/apache/falcon/predicate/Predicate.java
+++ b/scheduler/src/main/java/org/apache/falcon/predicate/Predicate.java
@@ -17,8 +17,8 @@
  */
 package org.apache.falcon.predicate;
 
+import org.apache.commons.lang3.StringUtils;
 import org.apache.falcon.FalconException;
-import org.apache.falcon.entity.v0.feed.Location;
 import org.apache.falcon.execution.NotificationHandler;
 import org.apache.falcon.notification.service.event.DataEvent;
 import org.apache.falcon.notification.service.event.Event;
@@ -26,6 +26,7 @@ import org.apache.falcon.notification.service.event.EventType;
 import org.apache.falcon.notification.service.event.RerunEvent;
 import org.apache.falcon.notification.service.event.TimeElapsedEvent;
 import org.apache.falcon.state.ID;
+import org.apache.hadoop.fs.Path;
 
 import java.io.Serializable;
 import java.util.Collections;
@@ -158,15 +159,15 @@ public class Predicate implements Serializable {
     /**
      * Creates a predicate of type DATA.
      *
-     * @param location
+     * @param paths List of paths to check
      * @return
      */
-    public static Predicate createDataPredicate(Location location) {
+    public static Predicate createDataPredicate(List<Path> paths) {
         return new Predicate(TYPE.DATA)
-                .addClause("path", (location == null) ? ANY : location.getPath())
-                .addClause("type", (location == null) ? ANY : location.getType());
+                .addClause("path", StringUtils.join(paths, ","));
     }
 
+
     /**
      * Creates a predicate of type JOB_COMPLETION.
      *
@@ -202,11 +203,8 @@ public class Predicate implements Serializable {
     public static Predicate getPredicate(Event event) throws FalconException {
         if (event.getType() == EventType.DATA_AVAILABLE) {
             DataEvent dataEvent = (DataEvent) event;
-            if (dataEvent.getDataLocation() != null && dataEvent.getDataType() !=
null) {
-                Location loc = new Location();
-                loc.setPath(dataEvent.getDataLocation().toString());
-                loc.setType(dataEvent.getDataType());
-                return createDataPredicate(loc);
+            if (dataEvent.getDataLocations() != null) {
+                return createDataPredicate(dataEvent.getDataLocations());
             } else {
                 throw new FalconException("Event does not have enough data to create a predicate");
             }

http://git-wip-us.apache.org/repos/asf/falcon/blob/4656f692/scheduler/src/test/java/org/apache/falcon/execution/FalconExecutionServiceTest.java
----------------------------------------------------------------------
diff --git a/scheduler/src/test/java/org/apache/falcon/execution/FalconExecutionServiceTest.java
b/scheduler/src/test/java/org/apache/falcon/execution/FalconExecutionServiceTest.java
index d66972c..d08f7d4 100644
--- a/scheduler/src/test/java/org/apache/falcon/execution/FalconExecutionServiceTest.java
+++ b/scheduler/src/test/java/org/apache/falcon/execution/FalconExecutionServiceTest.java
@@ -22,7 +22,6 @@ import org.apache.falcon.cluster.util.EmbeddedCluster;
 import org.apache.falcon.entity.v0.Entity;
 import org.apache.falcon.entity.v0.EntityType;
 import org.apache.falcon.entity.v0.Frequency;
-import org.apache.falcon.entity.v0.feed.LocationType;
 import org.apache.falcon.entity.v0.process.Input;
 import org.apache.falcon.entity.v0.process.Process;
 import org.apache.falcon.notification.service.NotificationServicesRegistry;
@@ -66,6 +65,7 @@ import org.testng.annotations.Test;
 import java.io.IOException;
 import java.lang.reflect.Method;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Date;
 import java.util.Iterator;
@@ -331,7 +331,7 @@ public class FalconExecutionServiceTest extends AbstractSchedulerTestBase
{
         Mockito.verify(mockTimeService).unregister(FalconExecutionService.get(), executorID);
     }
 
-    @Test
+    @Test(enabled = false)
     public void testTimeOut() throws Exception {
         storeEntity(EntityType.PROCESS, "summarize3");
         Process process = getStore().get(EntityType.PROCESS, "summarize3");
@@ -602,7 +602,8 @@ public class FalconExecutionServiceTest extends AbstractSchedulerTestBase
{
                     new DateTime(process.getClusters().getClusters().get(0).getValidity().getEnd()),
                     new DateTime(start.getTime() + instanceOffset));
         case DATA:
-            DataEvent dataEvent = new DataEvent(id, new Path("/projects/falcon/clicks"),
LocationType.DATA,
+            DataEvent dataEvent = new DataEvent(id,
+                    new ArrayList<Path>(Arrays.asList(new Path("/projects/falcon/clicks"))),
                     DataEvent.STATUS.AVAILABLE);
             return dataEvent;
         default:
@@ -614,7 +615,8 @@ public class FalconExecutionServiceTest extends AbstractSchedulerTestBase
{
         ID id = new InstanceID(instance);
         switch (type) {
         case DATA:
-            DataEvent dataEvent = new DataEvent(id, new Path("/projects/falcon/clicks"),
LocationType.DATA,
+            DataEvent dataEvent = new DataEvent(id,
+                    new ArrayList<Path>(Arrays.asList(new Path("/projects/falcon/clicks"))),
                     DataEvent.STATUS.AVAILABLE);
             return dataEvent;
         case JOB_SCHEDULE:

http://git-wip-us.apache.org/repos/asf/falcon/blob/4656f692/scheduler/src/test/java/org/apache/falcon/notification/service/DataAvailabilityServiceTest.java
----------------------------------------------------------------------
diff --git a/scheduler/src/test/java/org/apache/falcon/notification/service/DataAvailabilityServiceTest.java
b/scheduler/src/test/java/org/apache/falcon/notification/service/DataAvailabilityServiceTest.java
new file mode 100644
index 0000000..20c99b5
--- /dev/null
+++ b/scheduler/src/test/java/org/apache/falcon/notification/service/DataAvailabilityServiceTest.java
@@ -0,0 +1,135 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.falcon.notification.service;
+
+import org.apache.falcon.FalconException;
+import org.apache.falcon.cluster.util.EmbeddedCluster;
+import org.apache.falcon.entity.AbstractTestBase;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.process.Process;
+import org.apache.falcon.execution.NotificationHandler;
+import org.apache.falcon.notification.service.event.DataEvent;
+import org.apache.falcon.notification.service.impl.DataAvailabilityService;
+import org.apache.falcon.notification.service.request.DataNotificationRequest;
+import org.apache.falcon.state.EntityClusterID;
+import org.apache.falcon.state.ID;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.mockito.ArgumentCaptor;
+import org.mockito.Mockito;
+import org.testng.Assert;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Test cases for DataNotificationService.
+ */
+public class DataAvailabilityServiceTest extends AbstractTestBase {
+
+    private static NotificationHandler handler = Mockito.mock(NotificationHandler.class);
+    private static DataAvailabilityService dataAvailabilityService = Mockito.spy(new DataAvailabilityService());
+    private static final String BASE_PATH = "jail://testCluster:00/data/user";
+
+    @BeforeClass
+    public void setup() throws Exception {
+        this.dfsCluster = EmbeddedCluster.newCluster("testCluster");
+        this.conf = dfsCluster.getConf();
+        storeEntity(EntityType.CLUSTER, "testCluster");
+        dataAvailabilityService.init();
+    }
+
+    @Test
+    public void testDataNotificationServiceWithVaryingRequests() throws IOException,
+            FalconException, InterruptedException {
+        FileSystem fs = FileSystem.get(conf);
+        // invalid request
+        org.apache.falcon.entity.v0.process.Process mockProcess = new Process();
+        mockProcess.setName("test");
+        EntityClusterID id = new EntityClusterID(mockProcess, "testCluster");
+
+        DataNotificationRequest dataNotificationRequest = getDataNotificationRequest(new
ArrayList<Path>(), id);
+
+        dataAvailabilityService.register(dataNotificationRequest);
+        Thread.sleep(1000);
+        Mockito.verify(handler, Mockito.times(1)).onEvent(Mockito.any(DataEvent.class));
+        ArgumentCaptor<DataEvent> captor = ArgumentCaptor.forClass(DataEvent.class);
+        Mockito.verify(handler).onEvent(captor.capture());
+        Assert.assertEquals(captor.getValue().getStatus(), DataEvent.STATUS.AVAILABLE);
+        Assert.assertEquals(captor.getValue().getTarget(), dataNotificationRequest.getCallbackId());
+
+        cleanupDir(fs, BASE_PATH);
+
+        String path1 = BASE_PATH + "/" + "2015";
+        String path2 = BASE_PATH + "/" + "2016";
+
+        fs.create(new Path(path1));
+        List<Path> paths = new ArrayList<>();
+        paths.add(new Path(path1));
+        paths.add(new Path(path2));
+
+        // Adding paths and verifying its in queue
+        dataNotificationRequest = getDataNotificationRequest(paths, id);
+        dataAvailabilityService.register(dataNotificationRequest);
+        Mockito.verify(handler, Mockito.times(1)).onEvent(Mockito.any(DataEvent.class));
+
+
+        // create path and check availability status
+        fs.create(new Path(path2));
+        Thread.sleep(1000);
+        Mockito.verify(handler, Mockito.times(2)).onEvent(captor.capture());
+        Assert.assertEquals(captor.getValue().getStatus(), DataEvent.STATUS.AVAILABLE);
+        Assert.assertEquals(captor.getValue().getTarget(), dataNotificationRequest.getCallbackId());
+
+
+        // Adding one more path and verify Unavailable case
+        String path3 = BASE_PATH + "/" + "2017";
+        paths.add(new Path(path3));
+        dataNotificationRequest = getDataNotificationRequest(paths, id);
+        dataAvailabilityService.register(dataNotificationRequest);
+        Thread.sleep(2000);
+        Mockito.verify(handler, Mockito.times(3)).onEvent(captor.capture());
+        Assert.assertEquals(captor.getValue().getStatus(), DataEvent.STATUS.UNAVAILABLE);
+        Assert.assertEquals(captor.getValue().getTarget(), dataNotificationRequest.getCallbackId());
+
+        dataNotificationRequest = getDataNotificationRequest(paths, id);
+        dataAvailabilityService.register(dataNotificationRequest);
+        dataAvailabilityService.unregister(dataNotificationRequest.getHandler(),
+                dataNotificationRequest.getCallbackId());
+        fs.create(new Path(path3));
+        Thread.sleep(1000);
+        // It wont notify as event was unregistered
+        Mockito.verify(handler, Mockito.times(3)).onEvent(captor.capture());
+    }
+
+    private void cleanupDir(FileSystem fs, String basePath) throws IOException {
+        fs.delete(new Path(basePath), true);
+    }
+
+    private DataNotificationRequest getDataNotificationRequest(List<Path> locations,
ID id) {
+        DataAvailabilityService.DataRequestBuilder dataRequestBuilder =
+                new DataAvailabilityService.DataRequestBuilder(handler, id);
+        dataRequestBuilder.setPollingFrequencyInMillis(20).setCluster("testCluster")
+                .setTimeoutInMillis(100).setLocations(locations);
+        return dataRequestBuilder.build();
+    }
+
+}


Mime
View raw message