incubator-hcatalog-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From hashut...@apache.org
Subject svn commit: r1211077 [1/7] - in /incubator/hcatalog/trunk: ./ conf/ src/test/e2e/hcatalog/ src/test/e2e/hcatalog/conf/ src/test/e2e/hcatalog/deployers/ src/test/e2e/hcatalog/drivers/ src/test/e2e/hcatalog/tests/ src/test/e2e/hcatalog/tools/generate/ sr...
Date Tue, 06 Dec 2011 20:05:39 GMT
Author: hashutosh
Date: Tue Dec  6 20:05:37 2011
New Revision: 1211077

URL: http://svn.apache.org/viewvc?rev=1211077&view=rev
Log:
HCATALOG-172: End-to-end test framework for HCatalog

Added:
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHadoop.pm
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHive.pm
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverPig.pm
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hadoop.conf
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hive.conf
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/pig.conf
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/build.xml
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/java/org/apache/hadoop/hive/tools/generate/RCFileGenerator.java
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/install/install.sh
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/GroupByAge.java
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadJson.java
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadRC.java
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadText.java
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/ReadWrite.java
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/SimpleRead.java
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteJson.java
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteRC.java
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/udfs/java/org/apache/hcatalog/utils/WriteText.java
Removed:
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/ExistingClusterDeployer.pm
Modified:
    incubator/hcatalog/trunk/CHANGES.txt
    incubator/hcatalog/trunk/conf/proto-hive-site.xml
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/existing_deployer.conf
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/TestDriverHCat.pm
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/drivers/Util.pm
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tests/hcat.conf
    incubator/hcatalog/trunk/src/test/e2e/hcatalog/tools/generate/generate_data.pl

Modified: incubator/hcatalog/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/CHANGES.txt?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/CHANGES.txt (original)
+++ incubator/hcatalog/trunk/CHANGES.txt Tue Dec  6 20:05:37 2011
@@ -50,6 +50,8 @@ Trunk (unreleased changes)
   HCAT-63. RPM package integration with Hadoop (khorgath via hashutosh)
 
   IMPROVEMENTS
+  HCAT-172. End-to-end test framework for HCatalog (daijyc via hashutosh)
+
   HCAT-158. Update HAR support to work with Hadoop 205 (thw via hashutosh)
 
   HCAT-111. Issues with install instructions and release notes in release candidate (gates
via hashutosh)

Modified: incubator/hcatalog/trunk/conf/proto-hive-site.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/conf/proto-hive-site.xml?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/conf/proto-hive-site.xml (original)
+++ incubator/hcatalog/trunk/conf/proto-hive-site.xml Tue Dec  6 20:05:37 2011
@@ -57,7 +57,7 @@
 
 <property>
   <name>hive.metastore.sasl.enabled</name>
-    <value>false</value>
+    <value>SASL_ENABLED</value>
     <description>If true, the metastore thrift interface will be secured with SASL.
Clients must authenticate with Kerberos.</description>
 </property>
 
@@ -81,7 +81,7 @@
 
 <property>
   <name>hive.metastore.uris</name>
-  <value>thrift://SVRHOST:9933</value>
+  <value>thrift://SVRHOST:PORT</value>
   <description>URI for client to contact metastore server</description>
 </property>
 

Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/build.xml Tue Dec  6 20:05:37 2011
@@ -40,6 +40,7 @@
   <property name="udf.jar" value="${udf.java.dir}/testudf.jar"/>
   <property name="params.dir" value="${basedir}/paramfiles"/>
   <property name="lib.dir" value="${basedir}/lib"/>
+  <property name="rctool.java.dir" value="${basedir}/tools/generate/java"/>
 
   <property name="tar.name" value="${basedir}/hcattests.tar"/>
   <property name="tar.dir" value="${basedir}/tar"/>
@@ -56,12 +57,38 @@
   <property name="test.location" value="${basedir}/testdist"/>
   <property name="benchmark.location" value="${test.location}/benchmarks"/>
 
-
   <!-- Build the UDFs -->
   <target name="udfs" >
     <ant dir="${udf.java.dir}"/>
   </target>
 
+  <path id="hadoop.core.jar.location">
+    <fileset dir="${harness.hadoop.home}">
+      <include name="hadoop-core-*.jar"/>
+    </fileset>
+  </path>
+
+  <path id="hive.serde.jar.location">
+    <fileset dir="${hive.dir}/build/serde">
+      <include name="hive-serde-*.jar"/>
+    </fileset>
+  </path>
+
+  <path id="hive.ql.jar.location">
+    <fileset dir="${hive.dir}/build/ql">
+      <include name="hive-exec-*.jar"/>
+    </fileset>
+  </path>
+
+  <!-- Build the RCfile data generator -->
+  <target name="rctool" depends="property-check">
+    <ant dir="${rctool.java.dir}">
+      <property name="hive.serde.jarfile" refid="hive.serde.jar.location"/>
+      <property name="hive.ql.jarfile" refid="hive.ql.jar.location"/>
+      <property name="hadoop.core.jarfile" refid="hadoop.core.jar.location"/>
+    </ant>
+  </target>
+
   <!-- Build an archive to use in the tests -->
   <target name="tar" description="Create tar file with hcat modules">
     <mkdir dir="${tar.dir}"/>
@@ -103,6 +130,7 @@
     <copy todir="${tar.dir}/libexec/HCatTest">
       <fileset dir="${tool.src}/test"/>
       <fileset dir="${tool.src}/generate"/>
+      <fileset dir="${tool.src}/install"/>
     </copy>
 
     <copy todir="${tar.dir}/lib/java">
@@ -123,16 +151,16 @@
 
   <!-- Check that the necessary properties are setup -->
   <target name="property-check">
-     <fail message="Please set the property harness.old.pig to the directory where your
old version of Pig is installed"
-      unless="harness.old.pig"/>
-   <fail message="Please set the property harness.cluster.conf to the directory containing
hadoop conf "
+    <fail message="Please set the property harness.cluster.conf to the directory containing
hadoop conf "
       unless="harness.cluster.conf"/>
-     <fail message="Please set the property hcat.server.url (without thrift:// e.g. myserver:9083)"
-      unless="hcat.server.url"/>
+    <fail message="Please set the property harness.hadoop.home to the path of your hadoop
installation"
+      unless="harness.hadoop.home"/>
+    <fail message="Please set the property hive.metastore.uris to the hcat thrift server"
+      unless="hive.metastore.uris"/>
   </target>
 
   <!-- Prep the test area -->
-  <target name="init-test" depends="build-harness">
+  <target name="init-test" depends="build-harness, tar">
     <mkdir dir="${test.location}"/>
     <mkdir dir="${benchmark.location}"/>
 
@@ -156,19 +184,29 @@
       <env key="PH_OUT" value="."/>
       <env key="PH_ROOT" value="."/>
       <env key="HCAT_ROOT" value="${hcat.dir}"/>
+      <env key="HCAT_INSTALL_DIR" value="${hcat.install.dir}"/>
       <env key="HIVE_ROOT" value="${hcat.dir}/hive/external/"/>
       <env key="HCAT_EXTRA_JARS" value="${hcat-deps}/"/>
+      <env key="PIG_CLASSPATH" value="${hcat-deps}/"/>
       <env key="HCAT_JAR" value="${hcat.jar}/"/>
+      <env key="HADOOP_HOME" value="${harness.hadoop.home}/"/>
       <env key="PH_OLDPIG" value="${harness.old.pig}"/>
       <env key="PH_CLUSTER" value="${harness.cluster.conf}"/>
-      <env key="HCAT_URL" value="${hcat.server.url}"/>
+      <env key="HCAT_URL" value="${hive.metastore.uris}"/>
       <env key="METASTORE_PRINCIPAL" value="${metastore.principal}"/>
+      <env key="HIVE_HOME" value="${basedir}/../../../../hive/external/build/dist"/>
+      <env key="PH_CLUSTER_BIN" value="${harness.cluster.bin}"/>
+      <env key="PIG_HOME" value="${harness.pig.home}"/>
+      <env key="PIG_ROOT" value="${harness.pig.home}"/>
       <arg line="${tests.to.run}"/>
+      <arg value="${test.location}/tests/pig.conf"/>
+      <arg value="${test.location}/tests/hive.conf"/>
       <arg value="${test.location}/tests/hcat.conf"/>
+      <arg value="${test.location}/tests/hadoop.conf"/>
     </exec>
   </target>
 
-  <target name="init-deploy">
+  <target name="init-deploy" depends="rctool">
      <!-- For now default to the existing cluster deployer, since 
     it's all there is.  Once the local deployer is available that
     should be the default. -->
@@ -184,21 +222,22 @@
       <env key="PH_LOCAL" value="."/>
       <env key="PH_OUT" value="."/>
       <env key="PH_ROOT" value="."/>
+      <env key="HADOOP_HOME" value="${harness.hadoop.home}/"/>
       <env key="HIVE_ROOT" value="${hcat.dir}/hive/external/"/>
       <env key="HCAT_ROOT" value="${hcat.dir}"/>
+      <env key="HCAT_INSTALL_DIR" value="${hcat.install.dir}"/>
       <env key="HCAT_EXTRA_JARS" value="${hcat-deps}/"/>
       <env key="PH_OLDPIG" value="${harness.old.pig}"/>
       <env key="PH_CLUSTER" value="${harness.cluster.conf}"/>
       <env key="PH_CLUSTER_BIN" value="${harness.cluster.bin}"/>
-
-      <env key="HCAT_URL" value="${hcat.server.url}"/>
+      <env key="HIVE_HOME" value="../../../../hive/external"/>
+      <env key="PH_METASTORE_THRIFT" value="${harness.metastore.thrift}"/>
       <arg value="-deploycfg"/>
       <arg value="${deploy.conf}"/>
       <arg value="${deploy.opt}"/>
       <!-- Give a bogus test so it just does the deployment -->
       <arg value="-t"/>
       <arg value="NoSuchTest"/>
-      <arg value="${test.location}/tests/hcat.conf"/>
     </exec>
   </target>
 
@@ -214,6 +253,35 @@
     </antcall>
   </target>
 
+  <target name="install" depends="init-test">
+    <exec executable="./libexec/HCatTest/install.sh" dir="${test.location}">
+      <arg value="-D"/>
+      <arg value="${mysql.driver.home}"/>
+      <arg value="-d"/>
+      <arg value="${hcat.install.dir}"/>
+      <arg value="-f"/>
+      <arg value="${forrest.home}"/>
+      <arg value="-h"/>
+      <arg value="${harness.hadoop.home}"/>
+      <arg value="-m"/>
+      <arg value="localhost"/>
+      <arg value="-t"/>
+      <arg value="${hcat.tarball}"/>
+      <arg value="-p"/>
+      <arg value="${hcat.port}"/>
+      <arg value="-P"/>
+      <arg value="dbpassword"/>
+      <arg value="-w"/>
+      <arg value="/user/hive/warehouse"/>
+      <arg value="-s"/>
+      <arg value="${metastore.sasl.enabled}"/>
+      <arg value="-k"/>
+      <arg value="${metastore.keytabpath}"/>
+      <arg value="-K"/>
+      <arg value="${metastore.principal}"/>
+    </exec>
+  </target>
+
   <target name="deploy-test" depends="deploy, test"/>
 
   <target name="deploy-test-undeploy" depends="deploy, test, undeploy"/>

Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/default.conf Tue Dec  6 20:05:37 2011
@@ -27,8 +27,8 @@ chomp $me;
 
 $cfg = {
     #HDFS
-      'inpathbase'     => '/user/pig/tests/data'
-    , 'outpathbase'    => '/user/pig/out'
+    'inpathbase'     => '/user/hcat/tests/data'
+    , 'outpathbase'    => '/user/hcat/out'
 
    #LOCAL
     , 'localinpathbase'   => "$ENV{PH_LOCAL}/in" 
@@ -39,34 +39,46 @@ $cfg = {
     #TEST
     , 'benchmarkPath'    => "$ENV{PH_OUT}/benchmarks"
     , 'scriptPath'       => "$ENV{PH_ROOT}/libexec"
-    , 'tmpPath'          => '/tmp/pigtest'
-	, 'jythonjar'        => "$ENV{PH_JYTHON_JAR}"
+    , 'tmpPath'          => "/tmp/pigtest"
+    , 'jythonjar'        => "$ENV{PH_JYTHON_JAR}"
+
+    #TESTDB
+    , 'dbuser'         => 'hcattest'
+    , 'dbhost'         => 'localhost'
+    , 'dbpasswd'       => 'hcattest'
+    , 'dbdb'           => 'hcattestdb'
+
+    #COMMON
+    , 'metastore.principal' => "$ENV{METASTORE_PRINCIPAL}"   
+    , 'metastore_thrift' => $ENV{'PH_METASTORE_THRIFT'}
+    , 'thriftserver' => "$ENV{HCAT_URL}"
+
+    #HCAT
+    , 'hcat_data_dir'    => '/user/hcat/tests/data'
+    , 'hivehome'          => $ENV{'PH_HIVE_HOME'}
+    , 'hcathome'          => $ENV{'HCAT_INSTALL_DIR'}
+    , 'hcatalog.jar' => "$ENV{HCAT_JAR},$ENV{HIVE_ROOT}/build/dist/lib/hive-serde-0.9.0-SNAPSHOT.jar,$ENV{HIVE_ROOT}/build/dist/lib/hive-exec-0.9.0-SNAPSHOT.jar,$ENV{PIG_HOME}/pig-withouthadoop.jar,$ENV{HIVE_ROOT}/build/dist/lib/hive-metastore-0.9.0-SNAPSHOT.jar,$ENV{HIVE_ROOT}/build/dist/lib/libfb303-0.7.0.jar,$ENV{HIVE_ROOT}/build/dist/lib/jdo2-api-2.3-ec.jar"
 
     #PIG
     , 'testconfigpath'   => "$ENV{PH_CLUSTER}"
     , 'hadoopbin'   => "$ENV{PH_CLUSTER_BIN}"
     , 'funcjarPath'      => "$ENV{PH_ROOT}/lib/java"
     , 'paramPath'        => "$ENV{PH_ROOT}/paramfiles"
-    , 'pigpath'          => "$ENV{HCAT_ROOT}"
-	, 'oldpigpath'       => "$ENV{PH_OLDPIG}"
-    ,'additionaljars' =>  "$ENV{HCAT_EXTRA_JARS}"
-
-	#HADOOP
-	, 'hadoopHome'       => "$ENV{HCAT_ROOT}/lib"
- ,'hadoop_classpath' =>  "$ENV{HCAT_EXTRA_JARS}"
-     , 'userhomePath' => "$ENV{HOME}"
-    ,'local.bin'     => '/usr/bin'
- 
-    ,'logDir'                => "$ENV{PH_OUT}/log" 
-	,'propertiesFile'     => "./conf/testpropertiesfile.conf"
-	,'harness.console.level' => 'ERROR'
+    , 'pigpath'          => "$ENV{PIG_ROOT}"
+    , 'oldpigpath'       => "$ENV{PH_OLDPIG}"
+    , 'additionaljars' =>  "$ENV{HCAT_ROOT}/build/hcatalog/hcatalog-0.3.0-dev.jar:$ENV{HCAT_ROOT}/hive/external/build/metastore/hive-metastore-0.9.0-SNAPSHOT.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/libthrift.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/hive-exec-0.9.0-SNAPSHOT.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/libfb303.jar:$ENV{HCAT_ROOT}/hive/external/build/dist/lib/jdo2-api-2.3-ec.jar:$ENV{'HCAT_INSTALL_DIR'}/etc/hcatalog"
+
+    #HADOOP
+    , 'hadoopHome'       => "$ENV{HCAT_ROOT}/lib"
+    , 'hadoop_classpath' =>  "$ENV{HCAT_EXTRA_JARS}"
+    , 'userhomePath' => "$ENV{HOME}"
+    , 'local.bin'     => '/usr/bin'
+    , 'logDir'                => "$ENV{PH_OUT}/log" 
+    , 'propertiesFile'     => "./conf/testpropertiesfile.conf"
+    , 'harness.console.level' => 'ERROR'
 
-   #HIVE
+    #HIVE
     , 'hive_bin_location' => "$ENV{HIVE_ROOT}/build/dist/bin" 
+    , 'hivehome' => "$ENV{HIVE_HOME}"
 
-    , 'metastore.principal' => "$ENV{METASTORE_PRINCIPAL}"   
-  #HCATALOG
-  ,'thriftserver' => "$ENV{HCAT_URL}"
-     ,'hcatalog.jar' => "$ENV{HCAT_JAR},file://$ENV{HIVE_ROOT}/lib/thrift-fb303-0.5.0.jar,file://$ENV{HIVE_ROOT}/lib/thrift-0.5.0.jar,file://$ENV{HIVE_ROOT}/build/metastore/hive-metastore-0.8.0-SNAPSHOT.jar,file://$ENV{HIVE_ROOT}/build/common/hive-common-0.8.0-SNAPSHOT.jar,file://$ENV{HIVE_ROOT}/build/shims/hive-shims-0.8.0-SNAPSHOT.jar,file://$ENV{HIVE_ROOT}/build/serde/hive-serde-0.8.0-SNAPSHOT.jar,file://$ENV{HIVE_ROOT}/build/ql/hive-exec-0.8.0-SNAPSHOT.jar"
-#,'hcat_bin_location' => "$ENV{HCAT_ROOT}/bin/hcat.sh"
 };

Modified: incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/existing_deployer.conf
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/existing_deployer.conf?rev=1211077&r1=1211076&r2=1211077&view=diff
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/existing_deployer.conf (original)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/conf/existing_deployer.conf Tue Dec  6
20:05:37 2011
@@ -22,10 +22,11 @@
 #
 
 $cfg = {
-	'deployer' => 'ExistingClusterDeployer',
+	'deployer' => 'HCatExistingClusterDeployer',
 	
 	# hadoop values
 	'hadoopdir'   => $ENV{'PH_CLUSTER'},
+        'hcat_data_dir'  => '/user/hcat/test/data',
 
 	# db values
 # 	'dbuser' => 'pigtester',

Added: incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm
URL: http://svn.apache.org/viewvc/incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm?rev=1211077&view=auto
==============================================================================
--- incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm
(added)
+++ incubator/hcatalog/trunk/src/test/e2e/hcatalog/deployers/HCatExistingClusterDeployer.pm
Tue Dec  6 20:05:37 2011
@@ -0,0 +1,340 @@
+############################################################################           
+#  Licensed to the Apache Software Foundation (ASF) under one or more                  
+#  contributor license agreements.  See the NOTICE file distributed with               
+#  this work for additional information regarding copyright ownership.                 
+#  The ASF licenses this file to You under the Apache License, Version 2.0             
+#  (the "License"); you may not use this file except in compliance with                
+#  the License.  You may obtain a copy of the License at                               
+#                                                                                      
+#      http://www.apache.org/licenses/LICENSE-2.0                                      
+#                                                                                      
+#  Unless required by applicable law or agreed to in writing, software                 
+#  distributed under the License is distributed on an "AS IS" BASIS,                   
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.            
+#  See the License for the specific language governing permissions and                 
+#  limitations under the License.                                                      
+                                                                                       
+package HCatExistingClusterDeployer;
+
+use IPC::Run qw(run);
+use TestDeployer;
+use Util;
+
+use strict;
+use English;
+
+our @ISA = "TestDeployer";
+
+###########################################################################
+# Class: HiveExistingClusterDeployer
+# Deploy the Pig harness to a cluster and database that already exists.
+
+##############################################################################
+# Sub: new
+# Constructor
+#
+# Paramaters:
+# None
+#
+# Returns:
+# None.
+sub new
+{
+    my $proto = shift;
+    my $class = ref($proto) || $proto;
+    my $self = {};
+
+    bless($self, $class);
+
+    return $self;
+}
+
+##############################################################################
+# Sub: checkPrerequisites
+# Check any prerequisites before a deployment is begun.  For example if a 
+# particular deployment required the use of a database system it could
+# check here that the db was installed and accessible.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub checkPrerequisites
+{
+    my ($self, $cfg, $log) = @_;
+
+    if (! defined $ENV{'HADOOP_HOME'} || $ENV{'HADOOP_HOME'} eq "") {
+        print $log "You must set the environment variable HADOOP_HOME";
+        die "HADOOP_HOME not defined";
+    }
+
+    # Run a quick and easy Hadoop command to make sure we can
+    Util::runHadoopCmd($cfg, $log, "fs -ls /");
+
+}
+
+##############################################################################
+# Sub: deploy
+# Deploy any required packages
+# This is a no-op in this case because we're assuming both the cluster and the
+# database already exist
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub deploy
+{
+}
+
+##############################################################################
+# Sub: start
+# Start any software modules that are needed.
+# This is a no-op in this case because we're assuming both the cluster and the
+# database already exist
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub start
+{
+}
+
+##############################################################################
+# Sub: generateData
+# Generate any data needed for this test run.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub generateData
+{
+    my ($self, $cfg, $log) = @_;
+    my @tables = (
+        {
+            'name' => "studenttab10k",
+            'filetype' => "studenttab",
+            'rows' => 10000,
+            'hdfs' => "studenttab10k",
+        }, {
+            'name' => "votertab10k",
+            'filetype' => "votertab",
+            'rows' => 10000,
+            'hdfs' => "votertab10k",
+        }, {
+            'name' => "studentparttab30k",
+            'filetype' => "studentparttab",
+            'rows' => 10000,
+            'hdfs' => "studentparttab30k",
+            'partitions' => ['20110924', '20110925', '20110926']
+        },{
+            'name' => "studentnull10k",
+            'filetype' => "studentnull",
+            'rows' => 10000,
+            'hdfs' => "studentnull10k",
+        },{
+            'name' => "all100k",
+            'filetype' => "allscalars",
+            'rows' => 100000,
+            'hdfs' => "all100k",
+        },{
+            'name' => "all100kjson",
+            'filetype' => "json",
+            'rows' => 100000,
+            'hdfs' => "all100kjson",
+        },{
+            'name' => "all100krc",
+            'filetype' => "studenttab",
+            'rows' => 100000,
+            'hdfs' => "all100krc",
+            'format' => "rc",
+        }
+    );
+
+    
+    if (defined($cfg->{'load_hive_only'}) && $cfg->{'load_hive_only'} == 1)
{
+        return $self->hiveMetaOnly($cfg, $log, \@tables);
+    }
+
+    # Create the HDFS directories
+    Util::runHadoopCmd($cfg, $log, "fs -mkdir $cfg->{'hcat_data_dir'}");
+
+    foreach my $table (@tables) {
+        print "Generating data for $table->{'name'}\n";
+        # Generate the data
+        my @cmd;
+        if (defined($table->{'format'})) {
+            @cmd = ($cfg->{'gentool'}, $table->{'filetype'}, $table->{'rows'},
+                $table->{'name'}, $cfg->{'hcat_data_dir'}, $table->{'format'});
+        } else {
+            @cmd = ($cfg->{'gentool'}, $table->{'filetype'}, $table->{'rows'},
+                $table->{'name'}, $cfg->{'hcat_data_dir'});
+        }
+        $self->runCmd($log, \@cmd);
+
+        # Copy the data to HDFS
+        my $hadoop = "fs -mkdir $cfg->{'hcat_data_dir'}/$table->{'hdfs'}";
+        Util::runHadoopCmd($cfg, $log, $hadoop);
+
+        if (defined($table->{'partitions'})) {
+            foreach my $part (@{$table->{'partitions'}}) {
+                my $hadoop = "fs -mkdir
+                    $cfg->{'hcat_data_dir'}/$table->{'hdfs'}/$table->{'name'}.$part";
+                Util::runHadoopCmd($cfg, $log, $hadoop);
+                my $hadoop = "fs -copyFromLocal $table->{'name'}.$part " .
+                    "$cfg->{'hcat_data_dir'}/$table->{'hdfs'}/$table->{'name'}.$part/$table->{'name'}.$part";
+                Util::runHadoopCmd($cfg, $log, $hadoop);
+            }
+        } else {
+            my $hadoop = "fs -copyFromLocal $table->{'name'} ".
+                "$cfg->{'hcat_data_dir'}/$table->{'hdfs'}/$table->{'name'}";
+            Util::runHadoopCmd($cfg, $log, $hadoop);
+        }
+
+        print "Loading data into Hive for $table->{'name'}\n";
+        Util::runHCatCmdFromFile($cfg, $log,
+            "./" . $table->{'name'} .  ".hcat.sql");
+
+        print "Loading data into MySQL for $table->{'name'}\n";
+        Util::runDbCmd($cfg, $log, $table->{'name'} . ".mysql.sql");
+    }
+
+}
+
+###########################################################################
+# Sub: hiveMetaOnly                                                        
+# Load metadata into Hive, but don't load Mysql or HDFS, as we assume      
+# these have already been loaded.                                          
+#                                                                          
+# Paramaters:                                                              
+# cfg - hash from config file, including deployment config                 
+# log - log file handle                                                    
+#                                                                          
+# Returns:                                                                 
+# None                                                                     
+#                                                                          
+sub hiveMetaOnly
+{
+    my ($self, $cfg, $log, $tables) = @_;
+    foreach my $table (@{$tables}) {
+        print "Generating data for $table->{'name'}\n";
+        # Generate the data
+        my @cmd = ($cfg->{'gentool'}, $table->{'filetype'}, $table->{'rows'},
+            $table->{'name'}, $cfg->{'hcat_data_dir'});
+        $self->runCmd($log, \@cmd);
+
+        print "Loading data into Hive for $table->{'name'}\n";
+        Util::runHCatCmdFromFile($cfg, $log, "./" . $table->{'name'} .
+             ".hive.sql");
+    }
+}
+
+##############################################################################
+# Sub: confirmDeployment
+# Run checks to confirm that the deployment was successful.  When this is 
+# done the testing environment should be ready to run.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# Nothing
+# This method should die with an appropriate error message if there is 
+# an issue.
+#
+sub confirmDeployment
+{
+}
+
+##############################################################################
+# Sub: deleteData
+# Remove any data created that will not be removed by undeploying.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub deleteData
+{
+}
+
+##############################################################################
+# Sub: stop
+# Stop any servers or systems that are no longer needed once testing is
+# completed.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub stop
+{
+}
+
+##############################################################################
+# Sub: undeploy
+# Remove any packages that were installed as part of the deployment.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# None
+#
+sub undeploy
+{
+}
+
+##############################################################################
+# Sub: confirmUndeployment
+# Run checks to confirm that the undeployment was successful.  When this is 
+# done anything that must be turned off or removed should be turned off or
+# removed.
+#
+# Paramaters:
+# globalHash - hash from config file, including deployment config
+# log - log file handle
+#
+# Returns:
+# Nothing
+# This method should die with an appropriate error message if there is 
+# an issue.
+#
+sub confirmUndeployment
+{
+    die "$0 INFO : confirmUndeployment is a virtual function!";
+}
+
+sub runCmd($$$)
+{
+    my ($self, $log, $cmd) = @_;
+
+    print $log "Going to run [" . join(" ", @$cmd) . "]\n";
+
+    run($cmd, \undef, $log, $log) or
+        die "Failed running " . join(" ", @$cmd) . "\n";
+}
+
+1;



Mime
View raw message