falcon-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rostafiyc...@apache.org
Subject falcon git commit: FALCON-671 Add a test in falcon-regression where one oozie workflow has various actions like mr, hive, pig. Contributed by Karishma Gulati
Date Thu, 12 Feb 2015 12:28:06 GMT
Repository: falcon
Updated Branches:
  refs/heads/master 86fe67f53 -> fd10aa410


FALCON-671 Add a test in falcon-regression where one oozie workflow has various actions like
mr, hive, pig. Contributed by Karishma Gulati


Project: http://git-wip-us.apache.org/repos/asf/falcon/repo
Commit: http://git-wip-us.apache.org/repos/asf/falcon/commit/fd10aa41
Tree: http://git-wip-us.apache.org/repos/asf/falcon/tree/fd10aa41
Diff: http://git-wip-us.apache.org/repos/asf/falcon/diff/fd10aa41

Branch: refs/heads/master
Commit: fd10aa4106a2706b8e1f61dd17291338eec8f722
Parents: 86fe67f
Author: Ruslan Ostafiychuk <rostafiychuk@apache.org>
Authored: Thu Feb 12 14:26:59 2015 +0200
Committer: Ruslan Ostafiychuk <rostafiychuk@apache.org>
Committed: Thu Feb 12 14:26:59 2015 +0200

----------------------------------------------------------------------
 falcon-regression/CHANGES.txt                   |   3 +
 .../falcon/regression/core/util/BundleUtil.java |   4 +
 .../falcon/regression/core/util/HCatUtil.java   |  22 ++
 .../falcon/regression/core/util/OSUtil.java     |   2 +
 .../falcon/regression/CombinedActionsTest.java  | 210 +++++++++++++++++++
 .../resources/combinedActions/cluster-0.1.xml   |  48 +++++
 .../combinedActions/feed-template1.xml          |  51 +++++
 .../combinedActions/feed-template1_hcat.xml     |  41 ++++
 .../combinedActions/feed-template2.xml          |  51 +++++
 .../combinedActions/feed-template2_hcat.xml     |  47 +++++
 .../combinedActions/feed-template3.xml          |  51 +++++
 .../resources/combinedActions/process-agg.xml   |  51 +++++
 .../src/test/resources/combinedWorkflow/id.pig  |  20 ++
 .../test/resources/combinedWorkflow/script.hql  |  19 ++
 .../resources/combinedWorkflow/workflow.xml     | 110 ++++++++++
 15 files changed, 730 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/CHANGES.txt
----------------------------------------------------------------------
diff --git a/falcon-regression/CHANGES.txt b/falcon-regression/CHANGES.txt
index 6351efd..a88fb85 100644
--- a/falcon-regression/CHANGES.txt
+++ b/falcon-regression/CHANGES.txt
@@ -6,6 +6,9 @@ Trunk (Unreleased)
 
   NEW FEATURES
 
+   FALCON-671 Add a test in falcon-regression where one oozie workflow has various actions
like
+   mr, hive, pig (Karishma Gulati via Ruslan Ostafiychuk)
+
    FALCON-985 Upgrading jsch version in falcon regression pom (Pragya M via Samarth)
 
    FALCON-884 Add option to dump xmls generated by falcon (Raghav Kumar Gautam via Ruslan

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java
b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java
index 6e25a60..4218815 100644
--- a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java
+++ b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/BundleUtil.java
@@ -83,6 +83,10 @@ public final class BundleUtil {
         return readBundleFromFolder("updateBundle");
     }
 
+    public static Bundle readCombinedActionsBundle() throws IOException {
+        return readBundleFromFolder("combinedActions");
+    }
+
     private static Bundle readBundleFromFolder(final String folderPath) throws IOException
{
         LOGGER.info("Loading xmls from directory: " + folderPath);
         File directory = null;

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java
b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java
index d878ecb..3ac0e6a 100644
--- a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java
+++ b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/HCatUtil.java
@@ -21,10 +21,17 @@ package org.apache.falcon.regression.core.util;
 
 import org.apache.falcon.regression.core.enumsAndConstants.MerlinConstants;
 import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hive.hcatalog.api.HCatAddPartitionDesc;
 import org.apache.hive.hcatalog.api.HCatClient;
 import org.apache.hive.hcatalog.cli.SemanticAnalysis.HCatSemanticAnalyzer;
 import org.apache.hive.hcatalog.common.HCatException;
 import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
+import org.testng.Assert;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
 
 /**
  * util methods for HCat.
@@ -50,6 +57,21 @@ public final class HCatUtil {
         return HCatClient.create(hcatConf);
     }
 
+    public static void addPartitionsToTable(HCatClient clusterHC, List<String> partitions,
+        List<String> partitionLocations, String partitionCol, String dbName, String
tableName) throws HCatException {
+        Assert.assertEquals(partitions.size(), partitionLocations.size(),
+                "Number of locations is not same as number of partitions.");
+        final List<HCatAddPartitionDesc> partitionDesc = new ArrayList<HCatAddPartitionDesc>();
+        for (int i = 0; i < partitions.size(); ++i) {
+            final String partition = partitions.get(i);
+            final Map<String, String> onePartition = new HashMap<String, String>();
+            onePartition.put(partitionCol, partition);
+            final String partitionLoc = partitionLocations.get(i);
+            partitionDesc.add(HCatAddPartitionDesc.create(dbName, tableName, partitionLoc,
onePartition).build());
+        }
+        clusterHC.addPartitions(partitionDesc);
+    }
+
     @SuppressWarnings("deprecation")
     public static HCatFieldSchema getStringSchema(String fieldName, String comment) throws
HCatException {
         return new HCatFieldSchema(fieldName, HCatFieldSchema.Type.STRING, comment);

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java
b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java
index ed29d07..dd5e194 100644
--- a/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java
+++ b/falcon-regression/merlin-core/src/main/java/org/apache/falcon/regression/core/util/OSUtil.java
@@ -44,6 +44,8 @@ public final class OSUtil {
         String.format(OOZIE_EXAMPLE_INPUT_DATA + "normalInput%s", SEPARATOR);
     public static final String SINGLE_FILE =
         String.format(OOZIE_EXAMPLE_INPUT_DATA + "SingleFile%s", SEPARATOR);
+    public static final String OOZIE_COMBINED_ACTIONS =
+            String.format(RESOURCES + "combinedWorkflow%s", SEPARATOR);
 
     public static String getPath(String... pathParts) {
         return StringUtils.join(pathParts, OSUtil.SEPARATOR);

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/CombinedActionsTest.java
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/CombinedActionsTest.java
b/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/CombinedActionsTest.java
new file mode 100644
index 0000000..a0e1927
--- /dev/null
+++ b/falcon-regression/merlin/src/test/java/org/apache/falcon/regression/CombinedActionsTest.java
@@ -0,0 +1,210 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.falcon.regression;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.falcon.entity.v0.EntityType;
+import org.apache.falcon.entity.v0.Frequency;
+import org.apache.falcon.entity.v0.feed.LocationType;
+import org.apache.falcon.regression.Entities.FeedMerlin;
+import org.apache.falcon.regression.core.bundle.Bundle;
+import org.apache.falcon.regression.core.helpers.ColoHelper;
+import org.apache.falcon.regression.core.util.*;
+import org.apache.falcon.regression.testHelper.BaseTestClass;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hive.hcatalog.api.HCatClient;
+import org.apache.hive.hcatalog.api.HCatCreateTableDesc;
+import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.log4j.Logger;
+import org.apache.oozie.client.CoordinatorAction;
+import org.apache.oozie.client.OozieClient;
+import org.joda.time.format.DateTimeFormat;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.lang.reflect.Method;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Test where a single workflow contains multiple actions.
+ */
+
+public class CombinedActionsTest extends BaseTestClass {
+
+    private ColoHelper cluster = servers.get(0);
+    private FileSystem clusterFS = serverFS.get(0);
+    private OozieClient clusterOC = serverOC.get(0);
+    private HCatClient clusterHC;
+
+    private final String hiveTestDir = "/HiveData";
+    private final String baseTestHDFSDir = cleanAndGetTestDir() + hiveTestDir;
+    private final String inputHDFSDir = baseTestHDFSDir + "/input";
+    private final String outputHDFSDir = baseTestHDFSDir + "/output";
+    private String aggregateWorkflowDir = cleanAndGetTestDir() + "/aggregator";
+    private static final Logger LOGGER = Logger.getLogger(CombinedActionsTest.class);
+    private static final String HCATDIR = OSUtil.getPath("src", "test", "resources", "hcat");
+    private static final String LOCALHCATDATA = OSUtil.getPath(HCATDIR, "data");
+    public static final String DBNAME = "default";
+    public static final String COL1NAME = "id";
+    public static final String COL2NAME = "value";
+    public static final String PARTITIONCOLUMN = "dt";
+    private final String inputTableName = "combinedactionstest_input_table";
+    private final String outputTableName = "combinedactionstest_output_table";
+
+    private String pigMrTestDir = cleanAndGetTestDir() + "/pigMrData";
+    private String inputPath = pigMrTestDir + "/input/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}";
+    private String outputPathPig = pigMrTestDir + "/output/pig/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}";
+    private String outputPathMr = pigMrTestDir + "/output/mr/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}";
+
+    @BeforeClass(alwaysRun = true)
+    public void uploadWorkflow() throws Exception {
+
+        HadoopUtil.uploadDir(clusterFS, aggregateWorkflowDir, OSUtil.OOZIE_COMBINED_ACTIONS);
+    }
+
+    @BeforeMethod(alwaysRun = true)
+    public void setUp(Method method) throws Exception {
+        LOGGER.info("test name: " + method.getName());
+        clusterHC = cluster.getClusterHelper().getHCatClient();
+        bundles[0] = BundleUtil.readCombinedActionsBundle();
+        bundles[0] = new Bundle(bundles[0], cluster);
+        bundles[0].generateUniqueBundle(this);
+        AssertUtil.assertSucceeded(prism.getClusterHelper().submitEntity(bundles[0].getClusters().get(0)));
+    }
+
+    @AfterMethod(alwaysRun = true)
+    public void tearDown()throws Exception {
+        removeTestClassEntities();
+        clusterHC.dropTable(DBNAME, inputTableName, true);
+        clusterHC.dropTable(DBNAME, outputTableName, true);
+        HadoopUtil.deleteDirIfExists(pigMrTestDir, clusterFS);
+    }
+
+    /**
+     *Schedule a process, for which the oozie workflow contains multiple actions like hive,
mr, pig
+     *The process should succeed. Fails right now due to: https://issues.apache.org/jira/browse/FALCON-670
+     *
+     * @throws Exception
+    */
+
+    @Test
+    public void combinedMrPigHiveAction()throws Exception{
+
+        //create data for pig, mr and hcat jobs
+        final String startDate = "2010-01-01T20:00Z";
+        final String endDate = "2010-01-02T04:00Z";
+
+        String inputFeedMrPig = bundles[0].getFeed("sampleFeed1");
+        FeedMerlin feedObj = new FeedMerlin(inputFeedMrPig);
+
+        HadoopUtil.deleteDirIfExists(pigMrTestDir + "/input", clusterFS);
+        List<String> dataDates = TimeUtil.getMinuteDatesOnEitherSide(startDate, endDate,
20);
+
+        HadoopUtil.flattenAndPutDataInFolder(clusterFS, OSUtil.NORMAL_INPUT, pigMrTestDir
+ "/input", dataDates);
+
+        final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH",
"mm"}, "-");
+        dataDates = TimeUtil.getMinuteDatesOnEitherSide(startDate, endDate, 60, DateTimeFormat.forPattern(datePattern));
+
+        final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS,
LOCALHCATDATA,
+                inputHDFSDir, dataDates);
+
+        ArrayList<HCatFieldSchema> cols = new ArrayList<HCatFieldSchema>();
+        cols.add(HCatUtil.getStringSchema(COL1NAME, COL1NAME + " comment"));
+        cols.add(HCatUtil.getStringSchema(COL2NAME, COL2NAME + " comment"));
+        ArrayList<HCatFieldSchema> partitionCols = new ArrayList<HCatFieldSchema>();
+
+        partitionCols.add(HCatUtil.getStringSchema(PARTITIONCOLUMN, PARTITIONCOLUMN + " partition"));
+        clusterHC.createTable(HCatCreateTableDesc
+                .create(DBNAME, inputTableName, cols)
+                .partCols(partitionCols)
+                .ifNotExists(true)
+                .isTableExternal(true)
+                .location(inputHDFSDir)
+                .build());
+
+        clusterHC.createTable(HCatCreateTableDesc
+                .create(DBNAME, outputTableName, cols)
+                .partCols(partitionCols)
+                .ifNotExists(true)
+                .isTableExternal(true)
+                .location(outputHDFSDir)
+                .build());
+
+        HCatUtil.addPartitionsToTable(clusterHC, dataDates, dataset, "dt", DBNAME, inputTableName);
+
+        final String tableUriPartitionFragment = StringUtils.join(
+            new String[]{"#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}", "${MINUTE}"}, "-");
+        String inputTableUri =
+            "catalog:" + DBNAME + ":" + inputTableName + tableUriPartitionFragment;
+        String outputTableUri =
+            "catalog:" + DBNAME + ":" + outputTableName + tableUriPartitionFragment;
+
+        //Set input and output feeds for bundle
+        //input feed for both mr and pig jobs
+        feedObj.setLocation(LocationType.DATA, inputPath);
+        LOGGER.info(feedObj.toString());
+        AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
+
+        //output feed for pig jobs
+        String outputFeedPig = bundles[0].getFeed("sampleFeed2");
+        feedObj = new FeedMerlin(outputFeedPig);
+        feedObj.setLocation(LocationType.DATA, outputPathPig);
+        feedObj.setFrequency(new Frequency("5", Frequency.TimeUnit.minutes));
+        AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
+
+        //output feed for mr jobs
+        String outputFeedMr = bundles[0].getFeed("sampleFeed3");
+        feedObj = new FeedMerlin(outputFeedMr);
+        feedObj.setLocation(LocationType.DATA, outputPathMr);
+        AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
+
+        //input feed for hcat jobs
+        String inputHive = bundles[0].getFeed("sampleFeedHCat1");
+        feedObj = new FeedMerlin(inputHive);
+        feedObj.getTable().setUri(inputTableUri);
+        feedObj.setFrequency(new Frequency("1", Frequency.TimeUnit.hours));
+        feedObj.getClusters().getClusters().get(0).getValidity()
+            .setStart(TimeUtil.oozieDateToDate(startDate).toDate());
+        feedObj.getClusters().getClusters().get(0).getValidity()
+            .setEnd(TimeUtil.oozieDateToDate(endDate).toDate());
+        AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
+
+        //output feed for hcat jobs
+        String outputHive = bundles[0].getFeed("sampleFeedHCat2");
+        feedObj = new FeedMerlin(outputHive);
+        feedObj.getTable().setUri(outputTableUri);
+        feedObj.setFrequency(new Frequency("1", Frequency.TimeUnit.hours));
+        feedObj.getClusters().getClusters().get(0).getValidity()
+            .setStart(TimeUtil.oozieDateToDate(startDate).toDate());
+        feedObj.getClusters().getClusters().get(0).getValidity()
+            .setEnd(TimeUtil.oozieDateToDate(endDate).toDate());
+        AssertUtil.assertSucceeded(prism.getFeedHelper().submitEntity(feedObj.toString()));
+
+        bundles[0].setProcessWorkflow(aggregateWorkflowDir);
+        bundles[0].setProcessValidity(startDate, endDate);
+        bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
+        bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
+        AssertUtil.assertSucceeded(prism.getProcessHelper().submitAndSchedule(bundles[0].getProcessData()));
+        InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(),
+            1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);
+    }
+}

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/cluster-0.1.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/cluster-0.1.xml b/falcon-regression/merlin/src/test/resources/combinedActions/cluster-0.1.xml
new file mode 100644
index 0000000..40394e7
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedActions/cluster-0.1.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<cluster colo="ua1" description="" name="corp" xmlns="uri:falcon:cluster:0.1"
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+	<interfaces>
+		<interface type="readonly" endpoint="http://host:50070"
+			version="0.20.2" />
+		<interface type="write" endpoint="hdfs://host:54310"
+			version="0.20.2" />
+		<interface type="execute" endpoint="hdfs://host:54311"
+			version="0.20.2" />
+		<interface type="workflow"
+			endpoint="http://host:11000/oozie/" version="3.1" />
+		<interface type="messaging"
+			endpoint="tcp://host:61616?daemon=true"
+			version="5.1.6" />
+		<interface type="registry" endpoint="thrift://host:14003"
+			version="0.11.0" />
+
+	</interfaces>
+	<locations>
+		<location name="staging" path="/projects/falcon/staging" />
+		<location name="temp" path="/tmp" />
+		<location name="working" path="/projectsTest/falcon/working" />
+	</locations>
+	<properties>
+        <property name="hive.metastore.client.socket.timeout" value="120"/>
+		<property name="field1" value="value1" />
+		<property name="field2" value="value2" />
+	</properties>
+</cluster>

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1.xml
b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1.xml
new file mode 100644
index 0000000..e1eb173
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<feed description="clicks log" name="sampleFeed1" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+   <partitions>
+        <partition name="country" />
+        <partition name="colo" />
+    </partitions>
+<!--
+    <groups>online,bi</groups>-->
+
+    <frequency>minutes(20)</frequency>
+    <timezone>UTC</timezone>
+    <late-arrival cut-off="hours(6)" />
+
+    <clusters>
+        <cluster name="corp" type="source">
+            <validity start="2009-02-01T00:00Z" end="2099-05-01T00:00Z"
+     />
+            <retention limit="months(9000)" action="delete" /> <!-- Limit can be
in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+        </cluster>
+    </clusters>
+
+    <locations>
+        <location type="data" path="/test/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"
/>
+        <location type="stats" path="/projects/falcon/clicksStats" />
+        <location type="meta" path="/projects/falcon/clicksMetaData" />
+    </locations>
+    <ACL owner="testuser" group="group" permission="0x755" />
+    <schema location="/schema/clicks" provider="protobuf" />
+
+    <properties>
+        <property name="field1" value="value1" />
+        <property name="field2" value="value2" />
+    </properties>
+</feed>

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1_hcat.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1_hcat.xml
b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1_hcat.xml
new file mode 100644
index 0000000..e85a2e4
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template1_hcat.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<feed description="clicks log" name="sampleFeedHCat1" xmlns="uri:falcon:feed:0.1"
+	xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+	<frequency>minutes(20)</frequency>
+	<timezone>UTC</timezone>
+	<late-arrival cut-off="hours(6)" />
+
+	<clusters>
+		<cluster name="corp" type="source">
+			<validity start="2009-02-01T00:00Z" end="2099-05-01T00:00Z" />
+			<retention limit="months(9000)" action="delete" /> <!-- Limit can be in Time
or Instances 100, Action ENUM DELETE,ARCHIVE -->
+		</cluster>
+	</clusters>
+
+	<table
+		uri="catalog:default:mytablepart3#year=${YEAR};month=${MONTH};day=${DAY};hour=${HOUR}"
/>
+    <ACL owner="testuser" group="group" permission="0x755" />
+	<schema location="hcat" provider="hcat" />
+
+	<properties>
+		<property name="field1" value="value1" />
+		<property name="field2" value="value2" />
+	</properties>
+</feed>

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2.xml
b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2.xml
new file mode 100644
index 0000000..0d40de6
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<feed description="clicks log" name="sampleFeed2" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<!--   <partitions>
+        <partition name="fraud" />
+        <partition name="good" />
+    </partitions>
+
+    <groups>online,bi</groups>-->
+
+    <frequency>hours(1)</frequency>
+<timezone>UTC</timezone>
+    <late-arrival cut-off="hours(6)" />
+
+    <clusters>
+        <cluster name="corp" type="source">
+            <validity start="2009-02-01T01:00Z" end="2099-05-01T00:00Z"
+           />
+            <retention limit="hours(6)" action="delete" /> <!-- Limit can be in
Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+        </cluster>
+    </clusters>
+
+    <locations>
+        <location type="data" path="/examples/output-data/aggregator/aggregatedLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"
/>
+        <location type="stats" path="/projects/falcon/clicksStats" />
+        <location type="meta" path="/projects/falcon/clicksMetaData" />
+    </locations>
+    <ACL owner="testuser" group="group" permission="0x755" />
+    <schema location="/schema/clicks" provider="protobuf" />
+
+    <properties>
+        <property name="field1" value="value1" />
+        <property name="field2" value="value2" />
+    </properties>
+</feed>

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2_hcat.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2_hcat.xml
b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2_hcat.xml
new file mode 100644
index 0000000..cdc396c
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template2_hcat.xml
@@ -0,0 +1,47 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<feed description="clicks log" name="sampleFeedHCat2" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+<!--   <partitions>
+        <partition name="fraud" />
+        <partition name="good" />
+    </partitions>
+
+    <groups>online,bi</groups>-->
+
+    <frequency>hours(1)</frequency>
+<timezone>UTC</timezone>
+    <late-arrival cut-off="hours(6)" />
+
+    <clusters>
+        <cluster name="corp" type="source">
+            <validity start="2009-02-01T01:00Z" end="2099-05-01T00:00Z"
+           />
+            <retention limit="hours(6)" action="delete" /> <!-- Limit can be in
Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+        </cluster>
+    </clusters>
+
+   <table uri="catalog:default:output_table#year=${YEAR};month=${MONTH};day=${DAY};hour=${HOUR}"
/>
+    <ACL owner="testuser" group="group" permission="0x755" />
+    <schema location="hcat" provider="hcat" />
+
+    <properties>
+        <property name="field1" value="value1" />
+        <property name="field2" value="value2" />
+    </properties>
+</feed>

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/feed-template3.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/feed-template3.xml
b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template3.xml
new file mode 100644
index 0000000..d0698d5
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedActions/feed-template3.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+<feed description="clicks log" name="sampleFeed3" xmlns="uri:falcon:feed:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+   <partitions>
+        <partition name="country" />
+        <partition name="colo" />
+    </partitions>
+<!--
+    <groups>online,bi</groups>-->
+
+    <frequency>minutes(20)</frequency>
+    <timezone>UTC</timezone>
+    <late-arrival cut-off="hours(6)" />
+
+    <clusters>
+        <cluster name="corp" type="source">
+            <validity start="2009-02-01T00:00Z" end="2099-05-01T00:00Z"
+     />
+            <retention limit="months(9000)" action="delete" /> <!-- Limit can be
in Time or Instances 100, Action ENUM DELETE,ARCHIVE -->
+        </cluster>
+    </clusters>
+
+    <locations>
+        <location type="data" path="/test/input-data/rawLogs/${YEAR}/${MONTH}/${DAY}/${HOUR}/${MINUTE}"
/>
+        <location type="stats" path="/projects/falcon/clicksStats" />
+        <location type="meta" path="/projects/falcon/clicksMetaData" />
+    </locations>
+    <ACL owner="testuser" group="group" permission="0x755" />
+    <schema location="/schema/clicks" provider="protobuf" />
+
+    <properties>
+        <property name="field1" value="value1" />
+        <property name="field2" value="value2" />
+    </properties>
+</feed>

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedActions/process-agg.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedActions/process-agg.xml b/falcon-regression/merlin/src/test/resources/combinedActions/process-agg.xml
new file mode 100644
index 0000000..25abcd9
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedActions/process-agg.xml
@@ -0,0 +1,51 @@
+<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<process name="aggregator-coord16" xmlns="uri:falcon:process:0.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+    <clusters>
+        <cluster name="corp">
+            <validity end="2011-01-03T03:00Z" start="2010-01-02T01:00Z" />
+        </cluster>
+    </clusters>
+	<parallel>1</parallel>
+	<order>FIFO</order>
+       <frequency>minutes(5)</frequency>
+	<timezone>UTC</timezone>
+	<inputs>
+		<input end="now(0,0)" start="now(0,0)"
+			feed="sampleFeedHCat1" name="inputData"/>
+        <input end="now(0,0)" start="now(0,-20)"
+               feed="sampleFeed1" name="INPUT"/>
+	</inputs>
+	<outputs>
+		<output instance="now(0,0)" feed="sampleFeedHCat2"
+			name="outputData"/>
+        <output instance="now(0,0)" feed="sampleFeed2"
+            name="OUTPUT" />
+        <output instance="now(0,0)" feed="sampleFeed3"
+            name="OUTPUTMR" />
+    </outputs>
+	<properties>
+	   <property name="queueName" value="default"/>
+       <property name="fileTime" value="${formatTime(dateOffset(instanceTime(), 1, 'DAY'),
'yyyy-MMM-dd')}"/>
+       <property name="user" value="${user()}"/>
+	</properties>
+	<workflow path="/examples/apps/aggregator"/>
+	<retry policy="periodic" delay="minutes(3)" attempts="3" />
+</process>

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedWorkflow/id.pig
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedWorkflow/id.pig b/falcon-regression/merlin/src/test/resources/combinedWorkflow/id.pig
new file mode 100644
index 0000000..11f227e
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedWorkflow/id.pig
@@ -0,0 +1,20 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+A = load '$INPUT' using PigStorage(':');
+B = foreach A generate $0 as id;
+store B into '$OUTPUT' USING PigStorage();

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedWorkflow/script.hql
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedWorkflow/script.hql b/falcon-regression/merlin/src/test/resources/combinedWorkflow/script.hql
new file mode 100644
index 0000000..203633a
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedWorkflow/script.hql
@@ -0,0 +1,19 @@
+--
+-- Licensed to the Apache Software Foundation (ASF) under one
+-- or more contributor license agreements.  See the NOTICE file
+-- distributed with this work for additional information
+-- regarding copyright ownership.  The ASF licenses this file
+-- to you under the Apache License, Version 2.0 (the
+-- "License"); you may not use this file except in compliance
+-- with the License.  You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+
+INSERT OVERWRITE TABLE ${falcon_outputData_database}.${falcon_outputData_table} PARTITION
${falcon_inputData_partition_filter_hive} SELECT id, value FROM ${falcon_inputData_database}.${falcon_inputData_table}
WHERE ${falcon_inputData_partition_filter_hive};

http://git-wip-us.apache.org/repos/asf/falcon/blob/fd10aa41/falcon-regression/merlin/src/test/resources/combinedWorkflow/workflow.xml
----------------------------------------------------------------------
diff --git a/falcon-regression/merlin/src/test/resources/combinedWorkflow/workflow.xml b/falcon-regression/merlin/src/test/resources/combinedWorkflow/workflow.xml
new file mode 100644
index 0000000..a80719a
--- /dev/null
+++ b/falcon-regression/merlin/src/test/resources/combinedWorkflow/workflow.xml
@@ -0,0 +1,110 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<workflow-app xmlns="uri:oozie:workflow:0.2" name="aggregator-wf">
+    <start to="aggregator"/>
+    <action name="aggregator">
+        <map-reduce>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <prepare>
+                <delete path="${OUTPUTMR}"/>
+            </prepare>
+            <configuration>
+                <property>
+                    <name>mapred.job.queue.name</name>
+                    <value>${queueName}</value>
+                </property>
+                <property>
+                    <name>mapred.mapper.class</name>
+                    <value>org.apache.hadoop.mapred.lib.IdentityMapper</value>
+                </property>
+                <property>
+                    <name>mapred.reducer.class</name>
+                    <value>org.apache.hadoop.mapred.lib.IdentityReducer</value>
+                </property>
+                <property>
+                    <name>mapred.map.tasks</name>
+                    <value>1</value>
+                </property>
+                <property>
+                    <name>mapred.input.dir</name>
+                    <value>${INPUT}</value>
+                </property>
+                <property>
+                    <name>mapred.output.dir</name>
+                    <value>${OUTPUTMR}</value>
+                </property>
+            </configuration>
+        </map-reduce>
+        <ok to="pigAction"/>
+        <error to="failMapRed"/>
+    </action>
+
+    <action name="pigAction">
+        <pig>
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <script>id.pig</script>
+            <param>INPUT=${INPUT}</param>
+            <param>OUTPUT=${OUTPUT}</param>
+        </pig>
+        <ok to="hiveAction"/>
+        <error to="failPig"/>
+    </action>
+
+    <action name="hiveAction">
+        <hive xmlns="uri:oozie:hive-action:0.2">
+            <job-tracker>${jobTracker}</job-tracker>
+            <name-node>${nameNode}</name-node>
+            <job-xml>${wf:appPath()}/conf/hive-site.xml</job-xml>
+            <configuration>
+                <property>
+                    <name>mapred.job.queue.name</name>
+                    <value>${queueName}</value>
+                </property>
+                <property>
+                    <name>oozie.launcher.mapred.job.priority</name>
+                    <value>${jobPriority}</value>
+                </property>
+                <property>
+                    <name>queueName</name>
+                    <value>default</value>
+                </property>
+            </configuration>
+            <script>script.hql</script>
+            <param>falcon_inputData_database=${falcon_inputData_database}</param>
+            <param>falcon_outputData_database=${falcon_outputData_database}</param>
+            <param>falcon_inputData_table=${falcon_inputData_table}</param>
+            <param>falcon_outputData_table=${falcon_outputData_table}</param>
+            <param>falcon_inputData_partition_filter_hive=${falcon_inputData_partition_filter_hive}</param>
+        </hive>
+
+        <ok to="end"/>
+        <error to="failHive"/>
+    </action>
+
+    <kill name="failMapRed">
+        <message>Map/Reduce failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <kill name="failHive">
+        <message>Hive action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <kill name="failPig">
+        <message>Pig action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]</message>
+    </kill>
+    <end name="end"/>
+</workflow-app>
+- 


Mime
View raw message