falcon-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From pisayc...@apache.org
Subject [1/3] falcon git commit: Add ProcessUpdateTest, PipelineInstanceDependencyTest and other tests and test fixes. Contributed by Raghav Gautam and Paul Isaychuk
Date Tue, 20 Oct 2015 09:37:52 GMT
Repository: falcon
Updated Branches:
  refs/heads/master 5a55baed3 -> 9e6d5a6c5


http://git-wip-us.apache.org/repos/asf/falcon/blob/9e6d5a6c/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/triage/PipelineInstanceDependencyTest.java
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/triage/PipelineInstanceDependencyTest.java
b/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/triage/PipelineInstanceDependencyTest.java
new file mode 100644
index 0000000..81d5fcc
--- /dev/null
+++ b/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/triage/PipelineInstanceDependencyTest.java
@@ -0,0 +1,335 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.regression.triage;
+
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.Frequency;
+import org.apache.falcon.regression.Entities.FeedMerlin;
+import org.apache.falcon.regression.Entities.ProcessMerlin;
+import org.apache.falcon.regression.core.bundle.Bundle;
+import org.apache.falcon.regression.core.helpers.ColoHelper;
+import org.apache.falcon.regression.core.helpers.entity.AbstractEntityHelper;
+import org.apache.falcon.regression.core.util.AssertUtil;
+import org.apache.falcon.regression.core.util.BundleUtil;
+import org.apache.falcon.regression.core.util.EntityLineageUtil;
+import org.apache.falcon.regression.core.util.EntityLineageUtil.PipelineEntityType;
+import org.apache.falcon.regression.core.util.HadoopUtil;
+import org.apache.falcon.regression.core.util.InstanceUtil;
+import org.apache.falcon.regression.core.util.OSUtil;
+import org.apache.falcon.regression.core.util.TimeUtil;
+import org.apache.falcon.regression.testHelper.BaseTestClass;
+import org.apache.falcon.resource.APIResult;
+import org.apache.falcon.resource.InstanceDependencyResult;
+import org.apache.falcon.resource.LineageGraphResult;
+import org.apache.falcon.resource.LineageGraphResult.Edge;
+import org.apache.falcon.resource.SchedulableEntityInstance;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.security.authentication.client.AuthenticationException;
+import org.apache.log4j.Logger;
+import org.apache.oozie.client.CoordinatorAction;
+import org.apache.oozie.client.OozieClient;
+import org.joda.time.DateTime;
+import org.testng.Assert;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+/**
+ * Test for instance dependency endpoint.
+ */
+@Test(groups = "embedded")
+public class PipelineInstanceDependencyTest extends BaseTestClass {
+
+    private ColoHelper cluster = servers.get(0);
+    private FileSystem clusterFS = serverFS.get(0);
+    private OozieClient clusterOC = serverOC.get(0);
+    private String baseTestHDFSDir = cleanAndGetTestDir();
+    private String aggregateWorkflowDir = baseTestHDFSDir + "/aggregator";
+    private String feedInputPath = baseTestHDFSDir + "/input" + MINUTE_DATE_PATTERN;
+    private String feedOutputPath = baseTestHDFSDir + "/output-data" + MINUTE_DATE_PATTERN;
+    private final String startTimeStr = "2010-01-02T01:00Z";
+    private final DateTime startTime = TimeUtil.oozieDateToDate(startTimeStr);
+    private final String endTimeStr = "2010-01-02T01:11Z";
+    private List<String> inputFeedNames, outputFeedNames, processNames;
+    private List<Integer> inputFeedFrequencies;
+    private static final Logger LOGGER = Logger.getLogger(PipelineInstanceDependencyTest.class);
+    private String clusterName;
+
+    private static final Comparator<SchedulableEntityInstance> DEPENDENCY_COMPARATOR
=
+        new Comparator<SchedulableEntityInstance>() {
+            @Override
+            public int compare(SchedulableEntityInstance o1, SchedulableEntityInstance o2)
{
+                int tagDiff = o1.getTags().compareTo(o2.getTags());
+                if (tagDiff != 0) {
+                    return tagDiff;
+                }
+                int clusterDiff = o1.getCluster().compareTo(o2.getCluster());
+                if (clusterDiff != 0) {
+                    return clusterDiff;
+                }
+                int typeDiff = o1.getEntityType().compareTo(o2.getEntityType());
+                if (typeDiff != 0) {
+                    return typeDiff;
+                }
+                int dateDiff = o1.getInstanceTime().compareTo(o2.getInstanceTime());
+                if (dateDiff != 0) {
+                    return dateDiff;
+                }
+                return 0;
+            }
+        };
+    private final Comparator<Edge> edgeComparator =
+        new Comparator<Edge>() {
+            @Override
+            public int compare(Edge o1, Edge o2) {
+                return o1.toString().compareTo(o2.toString());
+            }
+        };
+
+    @BeforeClass(alwaysRun = true)
+    public void createTestData() throws Exception {
+        LOGGER.info("in @BeforeClass");
+        HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.RESOURCES_OOZIE);
+    }
+
+    /**
+     *  The scenario that we will setup looks like:<br>
+     *  inputFeed1 -> process1 -> outputFeed1 -> process2 -> outputFeed2 ->
process3 -> outputFeed3.
+     * @throws Exception
+     */
+    @BeforeMethod(alwaysRun = true)
+    public void setup() throws Exception {
+        bundles[0] = BundleUtil.readELBundle();
+        bundles[0] = new Bundle(bundles[0], cluster);
+        bundles[0].generateUniqueBundle(this);
+        clusterName = bundles[0].getClusterNames().get(0);
+        bundles[0].setProcessWorkflow(aggregateWorkflowDir);
+        bundles[0].setProcessValidity(startTimeStr, endTimeStr);
+        bundles[0].setProcessPeriodicity(5, Frequency.TimeUnit.minutes);
+        bundles[0].setInputFeedDataPath(feedInputPath);
+        bundles[0].setOutputFeedPeriodicity(5, Frequency.TimeUnit.minutes);
+        bundles[0].setOutputFeedLocationData(feedOutputPath);
+        bundles[0].submitFeedsScheduleProcess(prism);
+        final String oldInputFeedName = bundles[0].getInputFeedNameFromBundle();
+        final String oldOutputFeedName = bundles[0].getOutputFeedNameFromBundle();
+        final String oldProcessName = bundles[0].getProcessName();
+        inputFeedFrequencies = Arrays.asList(20, 5, 5);
+        inputFeedNames = Arrays.asList(oldInputFeedName, oldOutputFeedName, oldOutputFeedName
+ "-2");
+        outputFeedNames = Arrays.asList(oldOutputFeedName, oldOutputFeedName + "-2", oldOutputFeedName
+ "-3");
+        processNames = Arrays.asList(oldProcessName, oldProcessName + "-2", oldProcessName
+ "-3");
+        List<String> feedOutputPaths = Arrays.asList(
+            feedOutputPath,
+            baseTestHDFSDir + "/output-data-2" + MINUTE_DATE_PATTERN,
+            baseTestHDFSDir + "/output-data-3" + MINUTE_DATE_PATTERN
+        );
+
+        //create second, third process that consumes output of bundle[0]
+        for (int bIndex = 1; bIndex < 3; ++bIndex) {
+            final FeedMerlin outputFeed = new FeedMerlin(bundles[0].getOutputFeedFromBundle());
+            final ProcessMerlin processMerlin = bundles[0].getProcessObject();
+
+            processMerlin.setName(processNames.get(bIndex));
+
+            outputFeed.setDataLocationPath(feedOutputPaths.get(bIndex));
+            outputFeed.setName(outputFeedNames.get(bIndex));
+
+            //rename output feeds before renaming input feeds
+            processMerlin.renameFeeds(Collections.singletonMap(oldOutputFeedName, outputFeedNames.get(bIndex)));
+            processMerlin.renameFeeds(Collections.singletonMap(oldInputFeedName, inputFeedNames.get(bIndex)));
+            AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(outputFeed.toString()));
+            AssertUtil.assertSucceeded(prism.getProcessHelper().submitAndSchedule(processMerlin.toString()));
+        }
+
+        for (int index = 0; index < 3; ++index) {
+            InstanceUtil.waitTillInstanceReachState(clusterOC, processNames.get(index), 3,
+                CoordinatorAction.Status.WAITING, EntityType.PROCESS, 5);
+        }
+        LOGGER.info(inputFeedNames.get(0) + "(" + inputFeedFrequencies.get(0) + ") ->
*" + processNames.get(0)+ "* -> "
+            + inputFeedNames.get(1) + "(" + inputFeedFrequencies.get(1) + ") -> *" + processNames.get(1)+
"* -> "
+            + inputFeedNames.get(2) + "(" + inputFeedFrequencies.get(2) + ") -> *" + processNames.get(2)+
"* -> "
+            + outputFeedNames.get(2));
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void tearDown() {
+        removeTestClassEntities();
+    }
+
+    @Test
+    public void processInstanceDependencyTest() throws Exception {
+        final DateTime startTimeMinus20 = startTime.minusMinutes(20);
+
+        for (int index = 0; index < 3; ++index) {
+            List<SchedulableEntityInstance> expectedDependencies = new ArrayList<>();
+            final SchedulableEntityInstance outputInstance =
+                new SchedulableEntityInstance(outputFeedNames.get(index), clusterName, startTime.toDate(),
+                    EntityType.FEED);
+            outputInstance.setTags("Output");
+            expectedDependencies.add(outputInstance);
+            for (DateTime dt = new DateTime(startTime); !dt.isBefore(startTimeMinus20);
+                 dt = dt.minusMinutes(inputFeedFrequencies.get(index))) {
+                final SchedulableEntityInstance inputInstance =
+                    new SchedulableEntityInstance(inputFeedNames.get(index), clusterName,
dt.toDate(), EntityType.FEED);
+                inputInstance.setTags("Input");
+                expectedDependencies.add(inputInstance);
+            }
+            InstanceDependencyResult r = prism.getProcessHelper().getInstanceDependencies(processNames.get(index),
+                "?instanceTime=" + startTimeStr, null);
+
+            List<SchedulableEntityInstance> actualDependencies = Arrays.asList(r.getDependencies());
+            Collections.sort(expectedDependencies, DEPENDENCY_COMPARATOR);
+            Collections.sort(actualDependencies, DEPENDENCY_COMPARATOR);
+            Assert.assertEquals(actualDependencies, expectedDependencies,
+                "Unexpected dependencies for process: " + processNames.get(index));
+        }
+    }
+
+    @Test
+    public void inputFeedInstanceDependencyTest() throws Exception {
+        final String inputFeedToTest = inputFeedNames.get(1);
+        final DateTime endTime = TimeUtil.oozieDateToDate(endTimeStr);
+
+        List<SchedulableEntityInstance> expectedDependencies = new ArrayList<>();
+        final SchedulableEntityInstance outputInstance =
+            new SchedulableEntityInstance(processNames.get(0), clusterName, startTime.toDate(),
EntityType.PROCESS);
+        outputInstance.setTags("Output");
+        expectedDependencies.add(outputInstance);
+        final int processFrequency = 5;
+        for (DateTime dt = new DateTime(startTime); !dt.isAfter(endTime); dt = dt.plusMinutes(processFrequency))
{
+            final SchedulableEntityInstance inputInstance =
+                new SchedulableEntityInstance(processNames.get(1), clusterName, dt.toDate(),
EntityType.PROCESS);
+            inputInstance.setTags("Input");
+            expectedDependencies.add(inputInstance);
+        }
+        InstanceDependencyResult r = prism.getFeedHelper().getInstanceDependencies(inputFeedToTest,
+            "?instanceTime=" + startTimeStr, null);
+
+        List<SchedulableEntityInstance> actualDependencies = Arrays.asList(r.getDependencies());
+        Collections.sort(expectedDependencies, DEPENDENCY_COMPARATOR);
+        Collections.sort(actualDependencies, DEPENDENCY_COMPARATOR);
+        Assert.assertEquals(actualDependencies, expectedDependencies,
+            "Unexpected dependencies for process: " + inputFeedToTest);
+    }
+
+    @Test
+    public void outputFeedInstanceDependencyTest() throws Exception {
+        final String outputFeedToTest = outputFeedNames.get(1);
+        final DateTime endTime = TimeUtil.oozieDateToDate(endTimeStr);
+
+        List<SchedulableEntityInstance> expectedDependencies = new ArrayList<>();
+        final SchedulableEntityInstance outputInstance =
+            new SchedulableEntityInstance(processNames.get(1), clusterName, startTime.toDate(),
EntityType.PROCESS);
+        outputInstance.setTags("Output");
+        expectedDependencies.add(outputInstance);
+        final int processFrequency = 5;
+        for (DateTime dt = new DateTime(startTime); !dt.isAfter(endTime); dt = dt.plusMinutes(processFrequency))
{
+            final SchedulableEntityInstance inputInstance =
+                new SchedulableEntityInstance(processNames.get(2), clusterName, dt.toDate(),
EntityType.PROCESS);
+            inputInstance.setTags("Input");
+            expectedDependencies.add(inputInstance);
+        }
+        InstanceDependencyResult r = prism.getFeedHelper().getInstanceDependencies(outputFeedToTest,
+            "?instanceTime=" + startTimeStr, null);
+
+        List<SchedulableEntityInstance> actualDependencies = Arrays.asList(r.getDependencies());
+        Collections.sort(expectedDependencies, DEPENDENCY_COMPARATOR);
+        Collections.sort(actualDependencies, DEPENDENCY_COMPARATOR);
+        Assert.assertEquals(actualDependencies, expectedDependencies,
+            "Unexpected dependencies for process: " + outputFeedToTest);
+    }
+
+    /**
+     * Particular check for https://issues.apache.org/jira/browse/FALCON-1317.
+     */
+    @Test
+    public void testInstanceDependencySingleElement()
+        throws URISyntaxException, AuthenticationException, InterruptedException, IOException
{
+        InstanceDependencyResult r = prism.getFeedHelper().getInstanceDependencies(outputFeedNames.get(2),
+            "?instanceTime=" + startTimeStr, null);
+        Assert.assertEquals(r.getStatus(), APIResult.Status.SUCCEEDED, "Request shouldn't
fail.");
+        List<SchedulableEntityInstance> actualDependencies = Arrays.asList(r.getDependencies());
+        Assert.assertEquals(actualDependencies.size(), 1, "There should be single dependency
element.");
+    }
+
+    /**
+     * Run triage for different pipeline feeds and processes.
+     * @param bundleInd pipeline bundle
+     * @param entityType process or feed
+     */
+    @Test(dataProvider = "getParameters")
+    public void testTriageInstance(int bundleInd, EntityType entityType)
+        throws URISyntaxException, AuthenticationException, InterruptedException, IOException
{
+        AbstractEntityHelper helper;
+        String entityName;
+        if (entityType == EntityType.FEED) {
+            helper = prism.getFeedHelper();
+            entityName = outputFeedNames.get(bundleInd);
+        } else {
+            helper = prism.getProcessHelper();
+            entityName = processNames.get(bundleInd);
+        }
+        Map<PipelineEntityType, List<String>> entitiesNames = new HashMap<>();
+        entitiesNames.put(PipelineEntityType.PROCESS, processNames);
+        entitiesNames.put(PipelineEntityType.INPUT_FEED, inputFeedNames);
+        entitiesNames.put(PipelineEntityType.OUTPUT_FEED, outputFeedNames);
+        LineageGraphResult expected = EntityLineageUtil.getExpectedResult(bundleInd, entitiesNames,
+            inputFeedFrequencies, entityName, clusterName, startTimeStr);
+        LineageGraphResult actual = helper.getInstanceTriage(entityName,
+            "?start=" + startTimeStr).getTriageGraphs()[0];
+
+        final List<String> expectedVertices = new ArrayList<>(Arrays.asList(expected.getVertices()));
+        final List<Edge> expectedEdges = new ArrayList<>(Arrays.asList(expected.getEdges()));
+        final List<String> actualVertices = Arrays.asList(actual.getVertices());
+        final List<Edge> actualEdges = Arrays.asList(actual.getEdges());
+        Collections.sort(actualVertices);
+        Collections.sort(expectedVertices);
+        Collections.sort(actualEdges, edgeComparator);
+        Collections.sort(expectedEdges, edgeComparator);
+        Assert.assertEquals(actualVertices, expectedVertices,
+            "Actual vertices & expected vertices in triage graph don't match");
+        Assert.assertEquals(actualEdges, expectedEdges,
+            "Actual edges & expected edges in triage graph don't match");
+    }
+
+    @DataProvider
+    public Object[][] getParameters() {
+        return new Object[][]{
+            {0, EntityType.FEED},
+            {0, EntityType.PROCESS},
+            {1, EntityType.FEED},
+            {1, EntityType.PROCESS},
+            {2, EntityType.FEED},
+            {2, EntityType.PROCESS},
+        };
+    }
+
+}


Mime
View raw message