incubator-bigtop-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From r..@apache.org
Subject svn commit: r1326670 [15/47] - in /incubator/bigtop/trunk: ./ bigtop-deploy/puppet/manifests/ bigtop-deploy/puppet/modules/hadoop-hbase/manifests/ bigtop-deploy/puppet/modules/hadoop-hbase/templates/ bigtop-deploy/puppet/modules/hadoop-oozie/manifests/...
Date Mon, 16 Apr 2012 16:10:32 GMT
Modified: incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/oozie.xml
URL: http://svn.apache.org/viewvc/incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/oozie.xml?rev=1326670&r1=1326669&r2=1326670&view=diff
==============================================================================
--- incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/oozie.xml (original)
+++ incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/oozie.xml Mon Apr 16 16:10:22 2012
@@ -91,12 +91,5 @@
     <file name='/etc' owners='-1' perm='drwxr-xr-x' user='root' group='root' />
     <file name='/etc/init.d' owners='-1' perm='drwxr-xr-x' user='root' group='root' />
     <config name='/etc/init.d/oozie' owners='-1' perm='-rwxr-xr-x' user='root' group='root' />
-    <file name='/etc/oozie' owners='-1' perm='drwxr-xr-x' user='root' group='root' />
-    <file name='/etc/oozie/conf.dist' owners='-1' perm='drwxr-xr-x' user='root' group='root' />
-    <config name='/etc/oozie/conf.dist/oozie-default.xml' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <config name='/etc/oozie/conf.dist/oozie-env.sh' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <config name='/etc/oozie/conf.dist/oozie-site.xml' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <config name='/etc/oozie/conf.dist/adminusers.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <config name='/etc/oozie/conf.dist/oozie-log4j.properties' owners='-1' perm='-rw-r--r--' user='root' group='root' />
   </content>
 </oozie>
\ No newline at end of file

Modified: incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/package_data.xml
URL: http://svn.apache.org/viewvc/incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/package_data.xml?rev=1326670&r1=1326669&r2=1326670&view=diff
==============================================================================
--- incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/package_data.xml (original)
+++ incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/package_data.xml Mon Apr 16 16:10:22 2012
@@ -16,799 +16,83 @@
    limitations under the License.
 -->
 <packages>
-<mahout>
-  <metadata>
-    <summary>A set of Java libraries for scalable machine learning.</summary>
-    <description>Mahout's goal is to build scalable machine learning libraries.
- With scalable we mean:
- .
- Scalable to reasonably large data sets. Our core algorithms for clustering,
- classfication and batch based collaborative filtering are implemented on top of
- Apache Hadoop using the map/reduce paradigm. However we do not restrict
- contributions to Hadoop based implementations: Contributions that run on a
- single node or on a non-Hadoop cluster are welcome as well. The core libraries
- are highly optimized to allow for good performance also for non-distributed
- algorithms.
- Scalable to support your business case. Mahout is distributed under a
- commercially friendly Apache Software license.
- Scalable community. The goal of Mahout is to build a vibrant, responsive,
- diverse community to facilitate discussions not only on the project itself but
- also on potential use cases. Come to the mailing lists to find out more.</description>
-    <url>http://mahout.apache.org</url>
-  </metadata>
-  <deps>
-    <bigtop-utils/>
-    <hadoop/>
-  </deps>
-  <alternatives>
-    <mahout-conf>
-      <status>auto</status>
-      <link>/etc/mahout/conf</link>
-      <value>/etc/mahout/conf.dist</value>
-      <alt>/etc/mahout/conf.dist</alt>
-    </mahout-conf>
-  </alternatives>
-</mahout>
-<whirr>
-  <metadata>
-    <summary>Scripts and libraries for running software services on cloud infrastructure</summary>
-    <description>Whirr provides
- .
-  * A cloud-neutral way to run services. You don't have to worry about the
-    idiosyncrasies of each provider.
-  * A common service API. The details of provisioning are particular to the
-    service.
-  * Smart defaults for services. You can get a properly configured system
-    running quickly, while still being able to override settings as needed.
-    </description>
-    <url>http://whirr.apache.org/</url>
-  </metadata>
-  <deps>
-    <bigtop-utils/>
-  </deps>
-</whirr>
 <flume>
-  <metadata>
-    <summary>reliable, scalable, and manageable distributed data collection application</summary>
-    <description>Flume is a reliable, scalable, and manageable distributed data collection
- application for collecting data such as logs and delivering it to data stores
- such as Hadoop's HDFS.  It can efficiently collect, aggregate, and move large
- amounts of log data.  It has a simple, but flexible, architecture based on
- streaming data flows.  It is robust and fault tolerant with tunable reliability
- mechanisms and many failover and recovery mechanisms.  The system is centrally
- managed and allows for intelligent dynamic management.  It uses a simple
- extensible data model that allows for online analytic applications.</description>
-    <url>http://incubator.apache.org/projects/flume.html</url>
-  </metadata>
   <deps>
-    <bigtop-utils/>
     <adduser/>
-    <hadoop/>
   </deps>
-  <groups>
-    <flume>
-      <user>flume</user>
-    </flume>
-  </groups>
-  <users>
-    <flume>
-      <home>/var/run/flume</home>
-      <descr>Flume User</descr>
-      <shell>/bin/false</shell>
-    </flume>
-  </users>
-  <alternatives>
-    <flume-conf>
-      <status>auto</status>
-      <link>/etc/flume/conf</link>
-      <value>/etc/flume/conf.empty</value>
-      <alt>/etc/flume/conf.empty</alt>
-    </flume-conf>
-  </alternatives>
 </flume>
-<sqoop>
-  <metadata>
-    <summary>Tool for easy imports and exports of data sets between databases and HDFS</summary>
-    <description>Sqoop is a tool that provides the ability to import and export data sets between
- the Hadoop Distributed File System (HDFS) and relational databases.</description>
-    <url>http://incubator.apache.org/sqoop/</url>
-  </metadata>
-  <deps>
-    <bigtop-utils/>
-    <hadoop/>
-  </deps>
-  <alternatives>
-    <sqoop-conf>
-      <status>auto</status>
-      <link>/etc/sqoop/conf</link>
-      <value>/etc/sqoop/conf.dist</value>
-      <alt>/etc/sqoop/conf.dist</alt>
-    </sqoop-conf>
-  </alternatives>
-</sqoop>
 <sqoop-metastore>
-  <metadata>
-    <summary>Shared metadata repository for Sqoop.</summary>
-    <description>This optional package hosts a metadata server for Sqoop clients across a network to use.</description>
-    <url>http://incubator.apache.org/sqoop/</url>
-  </metadata>
   <deps>
-    <sqoop>/self</sqoop>
     <adduser/>
   </deps>
-  <services>
-    <sqoop-metastore>
-       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-       <oninstall>start</oninstall>
-       <configured>true</configured>
-    </sqoop-metastore>
-  </services>
-  <groups>
-    <sqoop>
-      <user>sqoop</user>
-    </sqoop>
-  </groups>
-  <users>
-    <sqoop>
-      <home>/var/lib/sqoop</home>
-      <descr>Sqoop User</descr>
-      <shell>/bin/false</shell>
-    </sqoop>
-  </users>
 </sqoop-metastore>
-<oozie>
-  <metadata>
-    <summary>A workflow and coordinator sytem for Hadoop jobs.</summary>
-    <description>Oozie workflows are actions arranged in a control dependency DAG (Direct
- Acyclic Graph).
- Oozie coordinator functionality allows to start workflows at regular
- frequencies and when data becomes available in HDFS.
- .
- An Oozie workflow may contain the following types of actions nodes:
- map-reduce, map-reduce streaming, map-reduce pipes, pig, file-system,
- sub-workflows, java, hive, sqoop and ssh (deprecated).
- .
- Flow control operations within the workflow can be done using decision,
- fork and join nodes. Cycles in workflows are not supported.
- .
- Actions and decisions can be parameterized with job properties, actions
- output (i.e. Hadoop counters) and HDFS  file information (file exists,
- file size, etc). Formal parameters are expressed in the workflow definition
- as variables.
- .
- A Workflow application is an HDFS directory that contains the workflow
- definition (an XML file), all the necessary files to run all the actions:
- JAR files for Map/Reduce jobs, shells for streaming Map/Reduce jobs, native
- libraries, Pig scripts, and other resource files.
- .
- Running workflow jobs is done via command line tools, a WebServices API or
- a Java API.
- .
- Monitoring the system and workflow jobs can be done via a web console, the
- command line tools, the WebServices API and the Java API.
- .
- Oozie is a transactional system and it has built in automatic and manual
- retry capabilities.
- .
- In case of workflow job failure, the workflow job can be rerun skipping
- previously completed actions, the workflow application can be patched before
- being rerun.</description>
-    <url>http://incubator.apache.org/oozie/</url>
-  </metadata>
-  <deps>
-    <oozie-client>/self</oozie-client>
-    <hadoop/>
-    <zip/>
-    <unzip/>
-  </deps>
-  <services>
-    <oozie>
-       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-       <oninstall>stop</oninstall>
-       <configured>true</configured>
-    </oozie>
-  </services>
-  <groups>
-    <oozie>
-      <user>oozie</user>
-    </oozie>
-  </groups>
-  <users>
-    <oozie>
-      <home>/var/run/oozie</home>
-      <descr>Oozie User</descr>
-      <shell>/bin/false</shell>
-    </oozie>
-  </users>
-</oozie>
-<oozie-client>
-  <metadata>
-    <!-- summary>Client for Oozie Workflow Engine</summary -->
-    <description>Command line utility that allows
- remote access and operation of oozie. Using this utility, the
- user can deploy workflows and perform other administrative and
- monitoring tasks such as start, stop, kill, resume workflows
- and coordinator jobs.</description>
-    <url>http://incubator.apache.org/oozie/</url>
-  </metadata>
-  <deps>
-    <bigtop-utils/>
-  </deps>
-  <alternatives>
-    <oozie-conf>
-      <status>auto</status>
-      <link>/etc/oozie/conf</link>
-      <value>/etc/oozie/conf.dist</value>
-      <alt>/etc/oozie/conf.dist</alt>
-    </oozie-conf>
-  </alternatives>
-</oozie-client>
 <zookeeper>
-  <metadata>
-    <summary>A high-performance coordination service for distributed applications.</summary>
-    <description>ZooKeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services.  All of these kinds of services are used in some form or another by distributed applications. Each time they are implemented there is a lot of work that goes into fixing the bugs and race conditions that are inevitable. Because of the difficulty of implementing these kinds of services, applications initially usually skimp on them ,which make them brittle in the presence of change and difficult to manage. Even when done correctly, different implementations of these services lead to management complexity when the applications are deployed.
-    </description>
-    <url>http://zookeeper.apache.org/</url>
-    <!-- group>misc</group -->
-  </metadata>
   <deps>
-    <bigtop-utils/>
+    <adduser/>
   </deps>
-  <groups>
-    <zookeeper>
-      <user>zookeeper</user>
-    </zookeeper>
-  </groups>
-  <users>
-    <zookeeper>
-      <home>/var/lib/zookeeper</home>
-      <descr>ZooKeeper User</descr>
-      <shell>/bin/false</shell>
-    </zookeeper>
-  </users>
-  <alternatives>
-    <zookeeper-conf>
-      <status>auto</status>
-      <link>/etc/zookeeper/conf</link>
-      <value>/etc/zookeeper/conf.dist</value>
-      <alt>/etc/zookeeper/conf.dist</alt>
-    </zookeeper-conf>
-  </alternatives>
 </zookeeper>
-<zookeeper-server>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <summary>This runs the zookeeper server on startup.</summary>
-    <url>http://zookeeper.apache.org/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
-  <deps>
-    <zookeeper>/self</zookeeper>
-  </deps>
-  <services>
-    <zookeeper-server>
-      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-      <oninstall>start</oninstall>
-      <configured>true</configured>
-    </zookeeper-server>
-  </services>
-</zookeeper-server>
-<pig>
-  <metadata>
-    <summary>A platform for analyzing large data sets using Hadoop</summary>
-    <description>Pig is a platform for analyzing large data sets that consists of a high-level language
- for expressing data analysis programs, coupled with infrastructure for evaluating these
- programs. The salient property of Pig programs is that their structure is amenable
- to substantial parallelization, which in turns enables them to handle very large data sets.
- .
- At the present time, Pig's infrastructure layer consists of a compiler that produces
- sequences of Map-Reduce programs, for which large-scale parallel implementations already
- exist (e.g., the Hadoop subproject). Pig's language layer currently consists of a textual
- language called Pig Latin, which has the following key properties:
- .
- * Ease of programming
-    It is trivial to achieve parallel execution of simple, "embarrassingly parallel" data
-    analysis tasks. Complex tasks comprised of multiple interrelated data transformations
-    are explicitly encoded as data flow sequences, making them easy to write, understand,
-    and maintain.
- * Optimization opportunities
-    The way in which tasks are encoded permits the system to optimize their execution
-    automatically, allowing the user to focus on semantics rather than efficiency.
- * Extensibility
-    Users can create their own functions to do special-purpose processing.</description>
-    <url>http://pig.apache.org/</url>
-  </metadata>
+<oozie>
   <deps>
-    <bigtop-utils/>
-    <hadoop/>
+    <adduser/>
+    <zip/>
+    <unzip/>
   </deps>
-  <!-- BUG: https://issues.cloudera.org/browse/DISTRO-223 -->
-  <alternatives>
-    <pig-conf>
-      <status>auto</status>
-      <link>/etc/pig/conf</link>
-      <value>/etc/pig/conf.dist</value>
-      <alt>/etc/pig/conf.dist</alt>
-    </pig-conf>
-  </alternatives>
-</pig>
+</oozie>
 <hive>
-  <metadata>
-    <summary>A data warehouse infrastructure built on top of Hadoop</summary>
-    <description>Hive is a data warehouse infrastructure built on top of Hadoop that
- provides tools to enable easy data summarization, adhoc querying and
- analysis of large datasets data stored in Hadoop files. It provides a
- mechanism to put structure on this data and it also provides a simple
- query language called Hive QL which is based on SQL and which enables
- users familiar with SQL to query this data. At the same time, this
- language also allows traditional map/reduce programmers to be able to
- plug in their custom mappers and reducers to do more sophisticated
- analysis which may not be supported by the built-in capabilities of
- the language.</description>
-    <url>http://hive.apache.org/</url>
-  </metadata>
   <deps>
     <adduser/>
-    <bigtop-utils/>
-    <hadoop/>
   </deps>
-  <alternatives>
-    <hive-conf>
-      <status>auto</status>
-      <value>/etc/hive/conf.dist</value>
-      <link>/etc/hive/conf</link>
-      <alt>/etc/hive/conf.dist</alt>
-    </hive-conf>
-  </alternatives>
 </hive>
 <hbase>
-  <metadata>
-    <summary>HBase is the Hadoop database</summary>
-    <description>Use it when you need random, realtime read/write access to your Big Data.
- This project's goal is the hosting of very large tables -- billions of rows
- X millions of columns -- atop clusters of commodity hardware.</description>
-    <url>http://hbase.apache.org/</url>
-    <!-- group>misc</group -->
-  </metadata>
   <deps>
     <adduser/>
-    <bigtop-utils/>
-    <zookeeper>>=3.3.1</zookeeper>
-    <hadoop/>
-  </deps>
-  <alternatives>
-    <hbase-conf>
-      <status>auto</status>
-      <value>/etc/hbase/conf.dist</value>
-      <link>/etc/hbase/conf</link>
-      <alt>/etc/hbase/conf.dist</alt>
-    </hbase-conf>
-  </alternatives>
-  <groups>
-    <hbase>
-      <user>hbase</user>
-    </hbase>
-  </groups>
-  <users>
-    <hbase>
-      <home>/var/lib/hbase</home> <!-- BUG https://issues.cloudera.org/browse/DISTRO-231 -->
-      <descr>HBase User</descr>
-      <shell>/bin/bash</shell>
-    </hbase>
-  </users>
-</hbase>
-<hbase-doc>
-  <metadata>
-    <summary>Documentation for HBase</summary>
-    <description>This package contains the HBase manual and JavaDoc.</description>
-    <url>http://hbase.apache.org/</url>
-    <!-- group>misc</group -->
-  </metadata>
-</hbase-doc>
-<hbase-master>
-  <metadata>
-    <summary>HMaster is the "master server" for a HBase</summary>
-    <description>There is only one HMaster for a single HBase deployment.</description>
-    <url>http://hbase.apache.org/</url>
-  </metadata>
-  <deps>
-    <hbase>/self</hbase>
   </deps>
-  <services>
-    <hbase-master>
-       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-       <oninstall>start</oninstall>
-       <configured>true</configured>
-    </hbase-master>
-  </services>
-</hbase-master>
-<hbase-regionserver>
-  <metadata>
-    <summary>HRegionServer makes a set of HRegions available to clients</summary>
-    <description>It checks in with the HMaster. There are many HRegionServers in a single
- HBase deployment.</description>
-    <url>http://hbase.apache.org/</url>
-  </metadata>
-  <deps>
-    <hbase>/self</hbase>
-  </deps>
-  <services>
-    <hbase-regionserver>
-       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-       <oninstall>stop</oninstall>
-       <configured>false</configured>
-    </hbase-regionserver>
-  </services>
-</hbase-regionserver>
-<hbase-thrift>
-  <metadata>
-    <summary>Provides an HBase Thrift service</summary>
-    <description>This package provides a Thrift service interface to the HBase distributed
- database.</description>
-    <url>http://hbase.apache.org/</url>
-  </metadata>
-  <deps>
-    <hbase>/self</hbase>
-  </deps>
-  <services>
-    <hbase-thrift>
-       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-       <oninstall>start</oninstall>
-       <configured>false</configured>
-    </hbase-thrift>
-  </services>
-</hbase-thrift>
+</hbase>
 <hadoop>
-  <metadata>
-    <!-- summary>Hadoop is a software platform for processing vast amounts of data</summary -->
-    <description>A software platform for processing vast amounts of data
- Hadoop is a software platform that lets one easily write and
- run applications that process vast amounts of data.
- .
- Here's what makes Hadoop especially useful:
-  * Scalable: Hadoop can reliably store and process petabytes.
-  * Economical: It distributes the data and processing across clusters
-                of commonly available computers. These clusters can number
-                into the thousands of nodes.
-  * Efficient: By distributing the data, Hadoop can process it in parallel
-               on the nodes where the data is located. This makes it
-               extremely rapid.
-  * Reliable: Hadoop automatically maintains multiple copies of data and
-              automatically redeploys computing tasks based on failures.
- .
- Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS).
- MapReduce divides applications into many small blocks of work. HDFS creates
- multiple replicas of data blocks for reliability, placing them on compute
- nodes around the cluster. MapReduce can then process the data where it is
- located.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- group>misc</group -->
-  </metadata>
   <deps>
+    <libc6/>
     <adduser/>
-    <bigtop-utils/>
   </deps>
-  <groups>
-    <hadoop>
-      <user>hdfs</user>
-      <user>mapred</user>
-    </hadoop>
-    <hdfs>
-      <user>hdfs</user>
-    </hdfs>
-    <mapred>
-      <user>mapred</user>
-    </mapred>
-  </groups>
-  <users>
-    <hdfs>
-      <home>/usr/lib/hadoop</home>
-      <descr>Hadoop HDFS</descr>
-      <shell>/bin/bash</shell>
-    </hdfs>
-    <mapred>
-      <home>/usr/lib/hadoop</home>
-      <descr>Hadoop MapReduce</descr>
-      <shell>/bin/bash</shell>
-    </mapred>
-  </users>
-  <alternatives>
-    <hadoop-conf>
-      <status>auto</status>
-      <link>/etc/hadoop/conf</link>
-      <value>/etc/hadoop/conf.empty</value>
-      <alt>/etc/hadoop/conf.empty</alt>
-    </hadoop-conf>
-  </alternatives>
 </hadoop>
-<hadoop-pipes>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Hadoop Pipes Library</summary -->
-    <description>Interface to author Hadoop MapReduce jobs in C++
- Contains Hadoop Pipes, a library which allows Hadoop MapReduce jobs to be
- written in C++.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
-  <deps>
-    <hadoop>/self</hadoop>
-  </deps>
-</hadoop-pipes>
-<hadoop-native>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Native libraries for Hadoop Compression</summary -->
-    <description>Native libraries for Hadoop (e.g., compression)
- This optional package contains native libraries that increase the performance
- of Hadoop's compression.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
-  <deps>
-    <libc6>>=2.7</libc6>
-    <libssl0.9.8>>=0.9.8k-1</libssl0.9.8>
-    <hadoop>/self</hadoop>
-    <liblzo2-2/>
-    <libzip1/>
-  </deps>
-</hadoop-native>
-<hadoop-namenode>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>The Hadoop namenode manages the block locations of HDFS files</summary -->
-    <description>Name Node for Hadoop
- The Hadoop Distributed Filesystem (HDFS) requires one unique server, the
- namenode, which manages the block locations of files on the filesystem.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
-  <deps>
-    <hadoop>/self</hadoop>
-  </deps>
-  <services>
-    <hadoop-namenode>
-      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-      <oninstall>stop</oninstall>
-      <configured>false</configured>
-    </hadoop-namenode>
-  </services>
-</hadoop-namenode>
-<hadoop-secondarynamenode>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Hadoop Secondary namenode</summary -->
-    <description>Secondary Name Node for Hadoop
- The Secondary Name Node is responsible for checkpointing file system images.
- It is _not_ a failover pair for the namenode, and may safely be run on the
- same machine.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
-  <deps>
-    <hadoop>/self</hadoop>
-  </deps>
-  <services>
-    <hadoop-secondarynamenode>
-      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-      <oninstall>stop</oninstall>
-      <configured>false</configured>
-    </hadoop-secondarynamenode>
-  </services>
-</hadoop-secondarynamenode>
-<hadoop-datanode>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Hadoop Data Node</summary -->
-    <description>Data Node for Hadoop
- The Data Nodes in the Hadoop Cluster are responsible for serving up
- blocks of data over the network to Hadoop Distributed Filesystem
- (HDFS) clients.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
+<hadoop-hdfs>
   <deps>
-    <hadoop>/self</hadoop>
-  </deps>
-  <services>
-    <hadoop-datanode>
-      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-      <oninstall>stop</oninstall>
-      <configured>false</configured>
-    </hadoop-datanode>
-  </services>
-</hadoop-datanode>
-<hadoop-jobtracker>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Hadoop Job Tracker</summary -->
-    <description>Job Tracker for Hadoop
- The jobtracker is a central service which is responsible for managing
- the tasktracker services running on all nodes in a Hadoop Cluster.
- The jobtracker allocates work to the tasktracker nearest to the data
- with an available work slot.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
-  <deps>
-    <hadoop>/self</hadoop>
+    <libc6/>
+    <adduser/>
   </deps>
-  <services>
-    <hadoop-jobtracker>
-      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-      <oninstall>stop</oninstall>
-      <configured>false</configured>
-    </hadoop-jobtracker>
-  </services>
-</hadoop-jobtracker>
-<hadoop-tasktracker>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Hadoop Task Tracker</summary -->
-    <description>Task Tracker for Hadoop
- The Task Tracker is the Hadoop service that accepts MapReduce tasks and
- computes results. Each node in a Hadoop cluster that should be doing
- computation should run a Task Tracker.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
+</hadoop-hdfs>
+<hadoop-yarn>
   <deps>
-    <hadoop>/self</hadoop>
+    <libc6/>
+    <adduser/>
   </deps>
-  <services>
-    <hadoop-tasktracker>
-      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
-      <oninstall>stop</oninstall>
-      <configured>false</configured>
-    </hadoop-tasktracker>
-  </services>
-</hadoop-tasktracker>
-<hadoop-conf-pseudo>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Hadoop installation in pseudo-distributed mode</summary -->
-    <description>Pseudo-distributed Hadoop configuration
- Contains configuration files for a "pseudo-distributed" Hadoop deployment.
- In this mode, each of the hadoop components runs as a separate Java process,
- but all on the same machine.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
+</hadoop-yarn>
+<hadoop-mapreduce>
   <deps>
-    <hadoop>/self</hadoop>
-    <hadoop-namenode>/self</hadoop-namenode>
-    <hadoop-secondarynamenode>/self</hadoop-secondarynamenode>
-    <hadoop-datanode>/self</hadoop-datanode>
-    <hadoop-jobtracker>/self</hadoop-jobtracker>
-    <hadoop-tasktracker>/self</hadoop-tasktracker>
+    <adduser/>
   </deps>
-</hadoop-conf-pseudo>
-<hadoop-doc>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Hadoop Documentation</summary -->
-    <description>Documentation for Hadoop
- This package contains the Java Documentation for Hadoop and its relevant
- APIs.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
-</hadoop-doc>
-<hadoop-source>
-  <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Source code for Hadoop</summary -->
-    <description>Source code for Hadoop
- This package contains the source code for Hadoop and its contrib modules.</description>
-    <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
-  </metadata>
-</hadoop-source>
+</hadoop-mapreduce>
 <libhdfs0>
   <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Mountable HDFS</summary -->
+    <summary>Mountable HDFS</summary>
     <description>JNI Bindings to access Hadoop HDFS from C
  See http://wiki.apache.org/hadoop/LibHDFS</description>
     <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
   </metadata>
   <deps>
     <hadoop>/self</hadoop>
     <libc6>>=2.4</libc6>
-    <libssl0.9.8>>=0.9.8k-1</libssl0.9.8>
   </deps>
 </libhdfs0>
 <libhdfs0-dev>
   <metadata>
-    <!-- license>APL2</license -->
-    <!-- arch>universal</arch -->
-    <!-- summary>Mountable HDFS</summary -->
-    <description>Development support for libhdfs0
- Includes examples and header files for accessing HDFS from C</description>
+    <summary>Mountable HDFS</summary>
+    <description>JNI Bindings to access Hadoop HDFS from C
+ See http://wiki.apache.org/hadoop/LibHDFS</description>
     <url>http://hadoop.apache.org/core/</url>
-    <!-- vendor>(none)</vendor -->
-    <!-- group>Development/Libraries</group -->
-    <!-- depends><dep>adduser</dep><dep>sun-java6-jre</dep><dep>sun-java6-bin</dep></depends -->
-    <!-- breaks></breaks -->
-    <!-- replaces></replaces -->
-    <!-- provides>zookeeper</provides -->
   </metadata>
   <deps>
     <hadoop>/self</hadoop>
-    <libhdfs0>=0.20.2+923.41-1~lucid-cdh3</libhdfs0>
+    <libhdfs0>/self</libhdfs0>
   </deps>
 </libhdfs0-dev>
 </packages>

Modified: incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/pig.xml
URL: http://svn.apache.org/viewvc/incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/pig.xml?rev=1326670&r1=1326669&r2=1326670&view=diff
==============================================================================
--- incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/pig.xml (original)
+++ incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/apt/pig.xml Mon Apr 16 16:10:22 2012
@@ -876,13 +876,6 @@
     <file name='/usr/lib/pig/CHANGES.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
     <file name='/usr/lib/pig/pig-0.9.2.jar' owners='-1' perm='-rw-r--r--' user='root' group='root' />
     <file name='/usr/lib/pig/RELEASE_NOTES.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <file name='/usr/lib/pig/license' owners='-1' perm='drwxr-xr-x' user='root' group='root' />
-    <file name='/usr/lib/pig/license/jline-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <file name='/usr/lib/pig/license/brics-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <file name='/usr/lib/pig/license/javacc-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <file name='/usr/lib/pig/license/jsch-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <file name='/usr/lib/pig/license/hadoop-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
-    <file name='/usr/lib/pig/license/junit-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
     <file name='/usr/share' owners='-1' perm='drwxr-xr-x' user='root' group='root' />
     <file name='/usr/share/doc' owners='-1' perm='drwxr-xr-x' user='root' group='root' />
     <file name='/usr/share/doc/pig' owners='-1' perm='drwxr-xr-x' user='root' group='root' />
@@ -3849,6 +3842,13 @@
     <file name='/usr/share/doc/pig/locationmap.xml' owners='-1' perm='-rw-r--r--' user='root' group='root' />
     <file name='/usr/share/doc/pig/zebra_users.pdf.gz' owners='-1' perm='-rw-r--r--' user='root' group='root' />
     <file name='/usr/share/doc/pig/test.html' owners='-1' perm='-rw-r--r--' user='root' group='root' />
+    <file name='/usr/share/doc/pig/license' owners='-1' perm='drwxr-xr-x' user='root' group='root' />
+    <file name='/usr/share/doc/pig/license/jline-LICENSE.txt.gz' owners='-1' perm='-rw-r--r--' user='root' group='root' />
+    <file name='/usr/share/doc/pig/license/junit-LICENSE.txt.gz' owners='-1' perm='-rw-r--r--' user='root' group='root' />
+    <file name='/usr/share/doc/pig/license/brics-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
+    <file name='/usr/share/doc/pig/license/javacc-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
+    <file name='/usr/share/doc/pig/license/jsch-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
+    <file name='/usr/share/doc/pig/license/hadoop-LICENSE.txt' owners='-1' perm='-rw-r--r--' user='root' group='root' />
     <file name='/usr/share/doc/pig/index.pdf.gz' owners='-1' perm='-rw-r--r--' user='root' group='root' />
     <file name='/usr/share/doc/pig/copyright' owners='-1' perm='-rw-r--r--' user='root' group='root' />
     <file name='/usr/share/doc/pig/zebra_pig.pdf.gz' owners='-1' perm='-rw-r--r--' user='root' group='root' />

Modified: incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml
URL: http://svn.apache.org/viewvc/incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml?rev=1326670&r1=1326669&r2=1326670&view=diff
==============================================================================
--- incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml (original)
+++ incubator/bigtop/trunk/bigtop-tests/test-artifacts/package/src/main/resources/package_data.xml Mon Apr 16 16:10:22 2012
@@ -16,4 +16,851 @@
    limitations under the License.
 -->
 <packages>
+<bigtop-utils>
+  <metadata>
+    <summary>Collection of useful tools for Bigtop</summary>
+    <description>This includes a collection of useful tools and files for Bigtop</description>
+    <url>http://incubator.apache.org/bigtop/</url>
+  </metadata>
+</bigtop-utils>
+<mahout>
+  <metadata>
+    <summary>A set of Java libraries for scalable machine learning.</summary>
+    <description>Mahout's goal is to build scalable machine learning libraries.
+ With scalable we mean:
+ .
+ Scalable to reasonably large data sets. Our core algorithms for clustering,
+ classfication and batch based collaborative filtering are implemented on top of
+ Apache Hadoop using the map/reduce paradigm. However we do not restrict
+ contributions to Hadoop based implementations: Contributions that run on a
+ single node or on a non-Hadoop cluster are welcome as well. The core libraries
+ are highly optimized to allow for good performance also for non-distributed
+ algorithms.
+ Scalable to support your business case. Mahout is distributed under a
+ commercially friendly Apache Software license.
+ Scalable community. The goal of Mahout is to build a vibrant, responsive,
+ diverse community to facilitate discussions not only on the project itself but
+ also on potential use cases. Come to the mailing lists to find out more.</description>
+    <url>http://mahout.apache.org</url>
+  </metadata>
+  <deps>
+    <hadoop/>
+    <bigtop-utils/>
+  </deps>
+  <alternatives>
+    <mahout-conf>
+      <status>auto</status>
+      <link>/etc/mahout/conf</link>
+      <value>/etc/mahout/conf.dist</value>
+      <alt>/etc/mahout/conf.dist</alt>
+    </mahout-conf>
+  </alternatives>
+</mahout>
+<whirr>
+  <metadata>
+    <summary>Scripts and libraries for running software services on cloud infrastructure.</summary>
+    <description>Whirr provides
+ .
+  * A cloud-neutral way to run services. You don't have to worry about the
+    idiosyncrasies of each provider.
+  * A common service API. The details of provisioning are particular to the
+    service.
+  * Smart defaults for services. You can get a properly configured system
+    running quickly, while still being able to override settings as needed.
+    </description>
+    <url>http://whirr.apache.org/</url>
+  </metadata>
+  <deps>
+    <bigtop-utils/>
+  </deps>
+</whirr>
+<flume>
+  <metadata>
+    <summary>reliable, scalable, and manageable distributed data collection application</summary>
+    <description>Flume is a reliable, scalable, and manageable distributed data collection
+ application for collecting data such as logs and delivering it to data stores
+ such as Hadoop's HDFS.  It can efficiently collect, aggregate, and move large
+ amounts of log data.  It has a simple, but flexible, architecture based on
+ streaming data flows.  It is robust and fault tolerant with tunable reliability
+ mechanisms and many failover and recovery mechanisms.  The system is centrally
+ managed and allows for intelligent dynamic management.  It uses a simple
+ extensible data model that allows for online analytic applications.</description>
+    <url>http://incubator.apache.org/projects/flume.html</url>
+  </metadata>
+  <deps>
+    <zookeeper/>
+    <hadoop/>
+    <bigtop-utils/>
+  </deps>
+  <groups>
+    <flume>
+      <user>flume</user>
+    </flume>
+  </groups>
+  <users>
+    <flume>
+      <home>/var/run/flume</home>
+      <descr>Flume User</descr>
+      <shell>/bin/false</shell>
+    </flume>
+  </users>
+  <alternatives>
+    <flume-conf>
+      <status>auto</status>
+      <link>/etc/flume/conf</link>
+      <value>/etc/flume/conf.empty</value>
+      <alt>/etc/flume/conf.empty</alt>
+    </flume-conf>
+  </alternatives>
+</flume>
+<flume-master>
+  <metadata>
+    <summary>central administration point for the flume data collection system</summary>
+    <description>The Flume master daemon is the central administration and data path control
+ point for flume nodes.</description>
+    <url>http://incubator.apache.org/projects/flume.html</url>
+  </metadata>
+  <deps>
+    <flume>/self</flume>
+  </deps>
+  <services>
+    <flume-master>
+       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+       <oninstall>start</oninstall>
+       <configured>true</configured>
+    </flume-master>
+  </services>
+</flume-master>
+<flume-node>
+  <metadata>
+    <summary>core element of Flume's data path that collects and delivers data</summary>
+    <description>The Flume node daemon is a core element of flume's data path and is responsible for generating, processing, and delivering data.</description>
+    <url>http://incubator.apache.org/projects/flume.html</url>
+  </metadata>
+  <deps>
+    <flume>/self</flume>
+  </deps>
+  <services>
+    <flume-node>
+       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+       <oninstall>start</oninstall>
+       <configured>true</configured>
+    </flume-node>
+  </services>
+</flume-node>
+<sqoop>
+  <metadata>
+    <summary>Tool for easy imports and exports of data sets between databases and HDFS</summary>
+    <description>Sqoop is a tool that provides the ability to import and export data sets between
+ the Hadoop Distributed File System (HDFS) and relational databases.</description>
+    <url>http://incubator.apache.org/sqoop/</url>
+  </metadata>
+  <deps>
+    <hadoop/>
+    <bigtop-utils/>
+  </deps>
+  <alternatives>
+    <sqoop-conf>
+      <status>auto</status>
+      <link>/etc/sqoop/conf</link>
+      <value>/etc/sqoop/conf.dist</value>
+      <alt>/etc/sqoop/conf.dist</alt>
+    </sqoop-conf>
+  </alternatives>
+</sqoop>
+<sqoop-metastore>
+  <metadata>
+    <summary>Shared metadata repository for Sqoop.</summary>
+    <description>This optional package hosts a metadata server for Sqoop clients across a network to use.</description>
+    <url>http://incubator.apache.org/sqoop/</url>
+  </metadata>
+  <deps>
+    <sqoop>/self</sqoop>
+  </deps>
+  <services>
+    <sqoop-metastore>
+       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+       <oninstall>start</oninstall>
+       <configured>true</configured>
+    </sqoop-metastore>
+  </services>
+  <groups>
+    <sqoop>
+      <user>sqoop</user>
+    </sqoop>
+  </groups>
+  <users>
+    <sqoop>
+      <home>/var/lib/sqoop</home>
+      <descr>Sqoop User</descr>
+      <shell>/bin/false</shell>
+    </sqoop>
+  </users>
+</sqoop-metastore>
+<oozie>
+  <metadata>
+    <summary>A workflow and coordinator sytem for Hadoop jobs.</summary>
+    <description>Oozie workflows are actions arranged in a control dependency DAG (Direct
+ Acyclic Graph).
+ Oozie coordinator functionality allows to start workflows at regular
+ frequencies and when data becomes available in HDFS.
+ .
+ An Oozie workflow may contain the following types of actions nodes:
+ map-reduce, map-reduce streaming, map-reduce pipes, pig, file-system,
+ sub-workflows, java, hive, sqoop and ssh (deprecated).
+ .
+ Flow control operations within the workflow can be done using decision,
+ fork and join nodes. Cycles in workflows are not supported.
+ .
+ Actions and decisions can be parameterized with job properties, actions
+ output (i.e. Hadoop counters) and HDFS  file information (file exists,
+ file size, etc). Formal parameters are expressed in the workflow definition
+ as variables.
+ .
+ A Workflow application is an HDFS directory that contains the workflow
+ definition (an XML file), all the necessary files to run all the actions:
+ JAR files for Map/Reduce jobs, shells for streaming Map/Reduce jobs, native
+ libraries, Pig scripts, and other resource files.
+ .
+ Running workflow jobs is done via command line tools, a WebServices API or
+ a Java API.
+ .
+ Monitoring the system and workflow jobs can be done via a web console, the
+ command line tools, the WebServices API and the Java API.
+ .
+ Oozie is a transactional system and it has built in automatic and manual
+ retry capabilities.
+ .
+ In case of workflow job failure, the workflow job can be rerun skipping
+ previously completed actions, the workflow application can be patched before
+ being rerun.</description>
+    <url>http://incubator.apache.org/oozie/</url>
+  </metadata>
+  <deps>
+    <oozie-client>/self</oozie-client>
+  </deps>
+  <services>
+    <oozie>
+       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+       <oninstall>stop</oninstall>
+       <configured>true</configured>
+    </oozie>
+  </services>
+  <groups>
+    <oozie>
+      <user>oozie</user>
+    </oozie>
+  </groups>
+  <users>
+    <oozie>
+      <home>/var/run/oozie</home>
+      <descr>Oozie User</descr>
+      <shell>/bin/false</shell>
+    </oozie>
+  </users>
+</oozie>
+<oozie-client>
+  <metadata>
+    <summary>Client for Oozie Workflow Engine</summary>
+    <description>Command line utility that allows
+ remote access and operation of oozie. Using this utility, the
+ user can deploy workflows and perform other administrative and
+ monitoring tasks such as start, stop, kill, resume workflows
+ and coordinator jobs.</description>
+    <url>http://incubator.apache.org/oozie/</url>
+  </metadata>
+  <alternatives>
+    <oozie-conf>
+      <status>auto</status>
+      <link>/etc/oozie/conf</link>
+      <value>/etc/oozie/conf.dist</value>
+      <alt>/etc/oozie/conf.dist</alt>
+    </oozie-conf>
+  </alternatives>
+  <deps>
+    <hadoop/>
+    <bigtop-utils/>
+  </deps>
+</oozie-client>
+<zookeeper>
+  <metadata>
+    <summary>A high-performance coordination service for distributed applications.</summary>
+    <description>ZooKeeper is a centralized service for maintaining configuration information, naming, providing distributed synchronization, and providing group services.  All of these kinds of services are used in some form or another by distributed applications. Each time they are implemented there is a lot of work that goes into fixing the bugs and race conditions that are inevitable. Because of the difficulty of implementing these kinds of services, applications initially usually skimp on them ,which make them brittle in the presence of change and difficult to manage. Even when done correctly, different implementations of these services lead to management complexity when the applications are deployed.
+    </description>
+    <url>http://zookeeper.apache.org/</url>
+  </metadata>
+  <deps>
+    <bigtop-utils/>
+  </deps>
+  <groups>
+    <zookeeper>
+      <user>zookeeper</user>
+    </zookeeper>
+  </groups>
+  <users>
+    <zookeeper>
+      <home>/var/lib/zookeeper</home>
+      <descr>ZooKeeper User</descr>
+      <shell>/bin/false</shell>
+    </zookeeper>
+  </users>
+  <alternatives>
+    <zookeeper-conf>
+      <status>auto</status>
+      <link>/etc/zookeeper/conf</link>
+      <value>/etc/zookeeper/conf.dist</value>
+      <alt>/etc/zookeeper/conf.dist</alt>
+    </zookeeper-conf>
+  </alternatives>
+</zookeeper>
+<zookeeper-server>
+  <metadata>
+    <summary>This runs the zookeeper server on startup.</summary>
+    <description>This package starts the zookeeper server on startup</description>
+    <url>http://zookeeper.apache.org/</url>
+  </metadata>
+  <deps>
+    <zookeeper>/self</zookeeper>
+  </deps>
+  <services>
+    <zookeeper-server>
+      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+      <oninstall>start</oninstall>
+      <configured>true</configured>
+    </zookeeper-server>
+  </services>
+</zookeeper-server>
+<pig>
+  <metadata>
+    <summary>A platform for analyzing large data sets using Hadoop</summary>
+    <description>Pig is a platform for analyzing large data sets that consists of a high-level language
+ for expressing data analysis programs, coupled with infrastructure for evaluating these
+ programs. The salient property of Pig programs is that their structure is amenable
+ to substantial parallelization, which in turns enables them to handle very large data sets.
+ .
+ At the present time, Pig's infrastructure layer consists of a compiler that produces
+ sequences of Map-Reduce programs, for which large-scale parallel implementations already
+ exist (e.g., the Hadoop subproject). Pig's language layer currently consists of a textual
+ language called Pig Latin, which has the following key properties:
+ .
+ * Ease of programming
+    It is trivial to achieve parallel execution of simple, "embarrassingly parallel" data
+    analysis tasks. Complex tasks comprised of multiple interrelated data transformations
+    are explicitly encoded as data flow sequences, making them easy to write, understand,
+    and maintain.
+ * Optimization opportunities
+    The way in which tasks are encoded permits the system to optimize their execution
+    automatically, allowing the user to focus on semantics rather than efficiency.
+ * Extensibility
+    Users can create their own functions to do special-purpose processing.</description>
+    <url>http://pig.apache.org/</url>
+  </metadata>
+  <deps>
+    <hadoop/>
+    <bigtop-utils/>
+  </deps>
+  <alternatives>
+    <pig-conf>
+      <status>auto</status>
+      <link>/etc/pig/conf</link>
+      <value>/etc/pig/conf.dist</value>
+      <alt>/etc/pig/conf.dist</alt>
+    </pig-conf>
+  </alternatives>
+</pig>
+<hive>
+  <metadata>
+    <summary>A data warehouse infrastructure built on top of Hadoop</summary>
+    <description>Hive is a data warehouse infrastructure built on top of Hadoop that
+ provides tools to enable easy data summarization, adhoc querying and
+ analysis of large datasets data stored in Hadoop files. It provides a
+ mechanism to put structure on this data and it also provides a simple
+ query language called Hive QL which is based on SQL and which enables
+ users familiar with SQL to query this data. At the same time, this
+ language also allows traditional map/reduce programmers to be able to
+ plug in their custom mappers and reducers to do more sophisticated
+ analysis which may not be supported by the built-in capabilities of
+ the language.</description>
+    <url>http://hive.apache.org/</url>
+  </metadata>
+  <deps>
+    <hadoop/>
+    <bigtop-utils/>
+  </deps>
+  <alternatives>
+    <hive-conf>
+      <status>auto</status>
+      <value>/etc/hive/conf.dist</value>
+      <link>/etc/hive/conf</link>
+      <alt>/etc/hive/conf.dist</alt>
+    </hive-conf>
+  </alternatives>
+</hive>
+<hive-metastore>
+  <metadata>
+    <summary>Shared metadata repository for Hive.</summary>
+    <description>This optional package hosts a metadata server for Sqoop clients across a network to use.</description>
+    <url>http://hive.apache.org/</url>
+  </metadata>
+  <deps>
+    <hive>/self</hive>
+  </deps>
+  <services>
+    <hive-metastore>
+       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+       <oninstall>start</oninstall>
+       <configured>true</configured>
+    </hive-metastore>
+  </services>
+  <groups>
+    <hive>
+      <user>hive</user>
+    </hive>
+  </groups>
+  <users>
+    <hive>
+      <home>/var/lib/hive</home>
+      <descr>Hive User</descr>
+      <shell>/bin/false</shell>
+    </hive>
+  </users>
+</hive-metastore>
+<hive-server>
+  <metadata>
+    <summary>Shared metadata repository for Hive.</summary>
+    <description>This optional package hosts a metadata server for Sqoop clients across a network to use.</description>
+    <url>http://hive.hadoop.apache.org/</url>
+  </metadata>
+  <deps>
+    <hive>/self</hive>
+  </deps>
+  <services>
+    <hive-server>
+       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+       <oninstall>start</oninstall>
+       <configured>true</configured>
+    </hive-server>
+  </services>
+  <groups>
+    <hive>
+      <user>hive</user>
+    </hive>
+  </groups>
+  <users>
+    <hive>
+      <home>/var/lib/hive</home>
+      <descr>Hive User</descr>
+      <shell>/bin/false</shell>
+    </hive>
+  </users>
+</hive-server>
+<hbase>
+  <metadata>
+    <summary>HBase is the Hadoop database</summary>
+    <description>Use it when you need random, realtime read/write access to your Big Data.
+ This project's goal is the hosting of very large tables -- billions of rows
+ X millions of columns -- atop clusters of commodity hardware.</description>
+    <url>http://hbase.apache.org/</url>
+  </metadata>
+  <deps>
+    <zookeeper/>
+    <hadoop/>
+    <bigtop-utils/>
+  </deps>
+  <alternatives>
+    <hbase-conf>
+      <status>auto</status>
+      <value>/etc/hbase/conf.dist</value>
+      <link>/etc/hbase/conf</link>
+      <alt>/etc/hbase/conf.dist</alt>
+    </hbase-conf>
+  </alternatives>
+  <groups>
+    <hbase>
+      <user>hbase</user>
+    </hbase>
+  </groups>
+  <users>
+    <hbase>
+      <home>/var/lib/hbase</home>
+      <descr>HBase User</descr>
+      <shell>/bin/bash</shell>
+    </hbase>
+  </users>
+</hbase>
+<hbase-doc>
+  <metadata>
+    <summary>Documentation for HBase</summary>
+    <description>This package contains the HBase manual and JavaDoc.</description>
+    <url>http://hbase.apache.org/</url>
+  </metadata>
+</hbase-doc>
+<hbase-master>
+  <metadata>
+    <summary>HMaster is the "master server" for a HBase</summary>
+    <description>There is only one HMaster for a single HBase deployment.</description>
+    <url>http://hbase.apache.org/</url>
+  </metadata>
+  <deps>
+    <hbase>/self</hbase>
+  </deps>
+  <services>
+    <hbase-master>
+       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+       <oninstall>start</oninstall>
+       <configured>true</configured>
+    </hbase-master>
+  </services>
+</hbase-master>
+<hbase-regionserver>
+  <metadata>
+    <summary>HRegionServer makes a set of HRegions available to clients</summary>
+    <description>It checks in with the HMaster. There are many HRegionServers in a single
+ HBase deployment.</description>
+    <url>http://hbase.apache.org/</url>
+  </metadata>
+  <deps>
+    <hbase>/self</hbase>
+  </deps>
+  <services>
+    <hbase-regionserver>
+       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+       <oninstall>stop</oninstall>
+       <configured>false</configured>
+    </hbase-regionserver>
+  </services>
+</hbase-regionserver>
+<hbase-thrift>
+  <metadata>
+    <summary>Provides an HBase Thrift service</summary>
+    <description>This package provides a Thrift service interface to the HBase distributed
+ database.</description>
+    <url>http://hbase.apache.org/</url>
+  </metadata>
+  <deps>
+    <hbase>/self</hbase>
+  </deps>
+  <services>
+    <hbase-thrift>
+       <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+       <oninstall>start</oninstall>
+       <configured>false</configured>
+    </hbase-thrift>
+  </services>
+</hbase-thrift>
+<hadoop>
+  <metadata>
+    <summary>Hadoop is a software platform for processing vast amounts of data</summary>
+    <description>A software platform for processing vast amounts of data
+ Hadoop is a software platform that lets one easily write and
+ run applications that process vast amounts of data.
+ .
+ Here's what makes Hadoop especially useful:
+  * Scalable: Hadoop can reliably store and process petabytes.
+  * Economical: It distributes the data and processing across clusters
+                of commonly available computers. These clusters can number
+                into the thousands of nodes.
+  * Efficient: By distributing the data, Hadoop can process it in parallel
+               on the nodes where the data is located. This makes it
+               extremely rapid.
+  * Reliable: Hadoop automatically maintains multiple copies of data and
+              automatically redeploys computing tasks based on failures.
+ .
+ Hadoop implements MapReduce, using the Hadoop Distributed File System (HDFS).
+ MapReduce divides applications into many small blocks of work. HDFS creates
+ multiple replicas of data blocks for reliability, placing them on compute
+ nodes around the cluster. MapReduce can then process the data where it is
+ located.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <bigtop-utils/>
+  </deps>
+  <groups>
+    <hadoop/>
+  </groups>
+  <alternatives>
+    <hadoop-conf>
+      <status>auto</status>
+      <link>/etc/hadoop/conf</link>
+      <value>/etc/hadoop/conf.empty</value>
+      <alt>/etc/hadoop/conf.empty</alt>
+    </hadoop-conf>
+  </alternatives>
+</hadoop>
+<hadoop-hdfs>
+  <metadata>
+    <summary>Hadoop Pipes Library</summary>
+    <description>Interface to author Hadoop MapReduce jobs in C++
+ Contains Hadoop Pipes, a library which allows Hadoop MapReduce jobs to be
+ written in C++.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop>/self</hadoop>
+    <bigtop-utils/>
+  </deps>
+  <groups>
+    <hdfs>
+      <user>hdfs</user>
+    </hdfs>
+  </groups>
+  <users>
+    <hdfs>
+      <home>/var/lib/hdfs</home>
+      <descr>Hadoop HDFS</descr>
+      <shell>/bin/bash</shell>
+    </hdfs>
+  </users>
+</hadoop-hdfs>
+<hadoop-yarn>
+  <metadata>
+    <summary>Hadoop Pipes Library</summary>
+    <description>Interface to author Hadoop MapReduce jobs in C++
+ Contains Hadoop Pipes, a library which allows Hadoop MapReduce jobs to be
+ written in C++.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop>/self</hadoop>
+    <bigtop-utils/>
+  </deps>
+  <groups>
+    <yarn>
+      <user>yarn</user>
+    </yarn>
+  </groups>
+  <users>
+    <yarn>
+      <home>/var/lib/yarn</home>
+      <descr>Hadoop YARN</descr>
+      <shell>/bin/bash</shell>
+    </yarn>
+  </users>
+</hadoop-yarn>
+<hadoop-mapreduce>
+  <metadata>
+    <summary>Hadoop Pipes Library</summary>
+    <description>Interface to author Hadoop MapReduce jobs in C++
+ Contains Hadoop Pipes, a library which allows Hadoop MapReduce jobs to be
+ written in C++.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop-yarn>/self</hadoop-yarn>
+    <bigtop-utils/>
+  </deps>
+  <groups>
+    <mapreduce>
+      <user>mapreduce</user>
+    </mapreduce>
+  </groups>
+  <users>
+    <mapreduce>
+      <home>/var/lib/mapreduce</home>
+      <descr>Hadoop MapReduce</descr>
+      <shell>/bin/bash</shell>
+    </mapreduce>
+  </users>
+</hadoop-mapreduce>
+<hadoop-httpfs>
+  <metadata>
+    <summary>Native libraries for Hadoop Compression</summary>
+    <description>Native libraries for Hadoop (e.g., compression)
+ This optional package contains native libraries that increase the performance
+ of Hadoop's compression.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop-hdfs>/self</hadoop-hdfs>
+    <bigtop-utils/>
+  </deps>
+  <groups>
+    <httpfs>
+      <user>httpfs</user>
+    </httpfs>
+  </groups>
+  <users>
+    <httpfs>
+      <home>/var/run/hadoop-httpfs</home>
+      <descr>Hadoop HTTPFS</descr>
+      <shell>/bin/bash</shell>
+    </httpfs>
+  </users>
+  <services>
+    <hadoop-httpfs>
+      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+      <oninstall>stop</oninstall>
+      <configured>false</configured>
+    </hadoop-httpfs>
+  </services>
+</hadoop-httpfs>
+<hadoop-hdfs-namenode>
+  <metadata>
+    <summary>The Hadoop namenode manages the block locations of HDFS files</summary>
+    <description>Name Node for Hadoop
+ The Hadoop Distributed Filesystem (HDFS) requires one unique server, the
+ namenode, which manages the block locations of files on the filesystem.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop-hdfs>/self</hadoop-hdfs>
+  </deps>
+  <services>
+    <hadoop-hdfs-namenode>
+      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+      <oninstall>stop</oninstall>
+      <configured>false</configured>
+    </hadoop-hdfs-namenode>
+  </services>
+</hadoop-hdfs-namenode>
+<hadoop-hdfs-secondarynamenode>
+  <metadata>
+    <summary>Hadoop Secondary namenode</summary>
+    <description>Secondary Name Node for Hadoop
+ The Secondary Name Node is responsible for checkpointing file system images.
+ It is _not_ a failover pair for the namenode, and may safely be run on the
+ same machine.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop-hdfs>/self</hadoop-hdfs>
+  </deps>
+  <services>
+    <hadoop-hdfs-secondarynamenode>
+      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+      <oninstall>stop</oninstall>
+      <configured>false</configured>
+    </hadoop-hdfs-secondarynamenode>
+  </services>
+</hadoop-hdfs-secondarynamenode>
+<hadoop-hdfs-datanode>
+  <metadata>
+    <summary>Hadoop Data Node</summary>
+    <description>Data Node for Hadoop
+ The Data Nodes in the Hadoop Cluster are responsible for serving up
+ blocks of data over the network to Hadoop Distributed Filesystem
+ (HDFS) clients.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop-hdfs>/self</hadoop-hdfs>
+  </deps>
+  <services>
+    <hadoop-hdfs-datanode>
+      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+      <oninstall>stop</oninstall>
+      <configured>false</configured>
+    </hadoop-hdfs-datanode>
+  </services>
+</hadoop-hdfs-datanode>
+<hadoop-yarn-resourcemanager>
+  <metadata>
+    <summary>Hadoop Job Tracker</summary>
+    <description>Job Tracker for Hadoop
+ The jobtracker is a central service which is responsible for managing
+ the tasktracker services running on all nodes in a Hadoop Cluster.
+ The jobtracker allocates work to the tasktracker nearest to the data
+ with an available work slot.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop-yarn>/self</hadoop-yarn>
+  </deps>
+  <services>
+    <hadoop-yarn-resourcemanager>
+      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+      <oninstall>stop</oninstall>
+      <configured>false</configured>
+    </hadoop-yarn-resourcemanager>
+  </services>
+</hadoop-yarn-resourcemanager>
+<hadoop-yarn-nodemanager>
+  <metadata>
+    <summary>Hadoop Task Tracker</summary>
+    <description>Task Tracker for Hadoop
+ The Task Tracker is the Hadoop service that accepts MapReduce tasks and
+ computes results. Each node in a Hadoop cluster that should be doing
+ computation should run a Task Tracker.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop-yarn>/self</hadoop-yarn>
+  </deps>
+  <services>
+    <hadoop-yarn-nodemanager>
+      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+      <oninstall>stop</oninstall>
+      <configured>false</configured>
+    </hadoop-yarn-nodemanager>
+  </services>
+</hadoop-yarn-nodemanager>
+<hadoop-yarn-proxyserver>
+  <metadata>
+    <summary>Hadoop Task Tracker</summary>
+    <description>Task Tracker for Hadoop
+ The Task Tracker is the Hadoop service that accepts MapReduce tasks and
+ computes results. Each node in a Hadoop cluster that should be doing
+ computation should run a Task Tracker.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop-yarn>/self</hadoop-yarn>
+  </deps>
+  <services>
+    <hadoop-yarn-proxyserver>>
+      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+      <oninstall>stop</oninstall>
+      <configured>false</configured>
+    </hadoop-yarn-proxyserver>>
+  </services>
+</hadoop-yarn-proxyserver>
+<hadoop-mapreduce-historyserver>
+  <metadata>
+    <summary>Hadoop Task Tracker</summary>
+    <description>Task Tracker for Hadoop
+ The Task Tracker is the Hadoop service that accepts MapReduce tasks and
+ computes results. Each node in a Hadoop cluster that should be doing
+ computation should run a Task Tracker.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop-mapreduce>/self</hadoop-mapreduce>
+  </deps>
+  <services>
+    <hadoop-mapreduce-historyserver>
+      <runlevel>2</runlevel><runlevel>3</runlevel><runlevel>4</runlevel><runlevel>5</runlevel>
+      <oninstall>stop</oninstall>
+      <configured>false</configured>
+    </hadoop-mapreduce-historyserver>
+  </services>
+</hadoop-mapreduce-historyserver>
+<hadoop-conf-pseudo>
+  <metadata>
+    <summary>Hadoop installation in pseudo-distributed mode</summary>
+    <description>Pseudo-distributed Hadoop configuration
+ Contains configuration files for a "pseudo-distributed" Hadoop deployment.
+ In this mode, each of the hadoop components runs as a separate Java process,
+ but all on the same machine.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+  <deps>
+    <hadoop>/self</hadoop>
+    <hadoop-hdfs-namenode>/self</hadoop-hdfs-namenode>
+    <hadoop-hdfs-datanode>/self</hadoop-hdfs-datanode>
+    <hadoop-hdfs-secondarynamenode>/self</hadoop-hdfs-secondarynamenode>
+    <hadoop-yarn-resourcemanager>/self</hadoop-yarn-resourcemanager>
+    <hadoop-yarn-nodemanager>/self</hadoop-yarn-nodemanager>
+    <hadoop-mapreduce-historyserver>/self</hadoop-mapreduce-historyserver>
+  </deps>
+</hadoop-conf-pseudo>
+<hadoop-doc>
+  <metadata>
+    <summary>Hadoop Documentation</summary>
+    <description>Documentation for Hadoop
+ This package contains the Java Documentation for Hadoop and its relevant
+ APIs.</description>
+    <url>http://hadoop.apache.org/core/</url>
+  </metadata>
+</hadoop-doc>
 </packages>



Mime
View raw message