chukwa-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ey...@apache.org
Subject chukwa git commit: CHUKWA-804. Update user documents to match current code base. (Eric Yang)
Date Mon, 04 Apr 2016 05:01:51 GMT
Repository: chukwa
Updated Branches:
  refs/heads/master f77266b55 -> 84ade8281


CHUKWA-804.  Update user documents to match current code base. (Eric Yang)


Project: http://git-wip-us.apache.org/repos/asf/chukwa/repo
Commit: http://git-wip-us.apache.org/repos/asf/chukwa/commit/84ade828
Tree: http://git-wip-us.apache.org/repos/asf/chukwa/tree/84ade828
Diff: http://git-wip-us.apache.org/repos/asf/chukwa/diff/84ade828

Branch: refs/heads/master
Commit: 84ade82813f0986fed597c3a5e3ccfe1540fc5de
Parents: f77266b
Author: Eric Yang <eyang@apache.org>
Authored: Sun Apr 3 22:01:41 2016 -0700
Committer: Eric Yang <eyang@apache.org>
Committed: Sun Apr 3 22:01:41 2016 -0700

----------------------------------------------------------------------
 CHANGES.txt                                     |   2 +
 pom.xml                                         | 168 ++++++--
 .../chukwa/hicc/rest/MetricsController.java     |  12 +
 src/main/resources/hicc-rest.xml                |  54 +++
 src/site/apt/Quick_Start_Guide.apt.vm           |  29 +-
 src/site/apt/datamodel.apt                      |   8 +-
 src/site/apt/pipeline.apt                       |  20 +
 src/site/apt/programming.apt                    | 395 +------------------
 src/site/apt/user.apt.vm                        | 260 ++++++++++++
 src/site/site.xml                               |   1 +
 10 files changed, 497 insertions(+), 452 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index c6d1f08..dbb8d0e 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -6,6 +6,8 @@ Trunk (unreleased changes)
 
   IMPROVEMENTS
 
+    CHUKWA-804.  Update user documents to match current code base. (Eric Yang)
+
     CHUKWA-802. Updated Javadoc for Java 8 support.  (Eric Yang)
 
   BUGS

http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 029a3d3..c00fe32 100644
--- a/pom.xml
+++ b/pom.xml
@@ -50,6 +50,7 @@
         <hbaseVersion>1.2.0</hbaseVersion>
         <hadoopVersion>2.7.2</hadoopVersion>
         <solrVersion>5.5.0</solrVersion>
+        <javaVersion>1.8</javaVersion>
         <!-- <JAVA_HOME>${java.home}</JAVA_HOME> -->
         <gora.version>0.6</gora.version>
     </properties>
@@ -514,9 +515,9 @@
                       <goals>
                         <goal>testCompile</goal>
                       </goals>
-                      <compilerVersion>1.8</compilerVersion>
-                      <source>1.8</source>
-                      <target>1.8</target>
+                      <compilerVersion>${javaVersion}</compilerVersion>
+                      <source>${javaVersion}</source>
+                      <target>${javaVersion}</target>
                       <testExcludes>
                         <exclude>**/ChukwaJobTrackerInstrumentation.java</exclude>
                         <exclude>**/TestDemuxManager.java</exclude>
@@ -1331,6 +1332,36 @@
                       <additionalparam>-output ${project.build.outputDirectory}/agent-rest.xml</additionalparam>
                     </configuration>
                   </execution>
+                  <execution>
+                    <id>hicc-public-api</id>
+                    <goals>
+                      <goal>javadoc</goal>
+                    </goals>
+                    <phase>compile</phase>
+                    <configuration>
+                      <encoding>UTF-8</encoding>
+                      <verbose>false</verbose>
+                      <show>public</show>
+                      <failOnError>false</failOnError>
+                      <subpackages>org.apache.hadoop.chukwa.hicc.rest</subpackages>
+                      <doclet>com.sun.jersey.wadl.resourcedoc.ResourceDocletJSON</doclet>
+                      <docletPath>${path.separator}${project.build.outputDirectory}</docletPath>
+                      <docletArtifacts>
+                        <docletArtifact>
+                          <groupId>com.atlassian.plugins.rest</groupId>
+                          <artifactId>atlassian-rest-doclet</artifactId>
+                          <version>2.8.0-m3</version>
+                        </docletArtifact>
+                        <docletArtifact>
+                          <groupId>xerces</groupId>
+                          <artifactId>xercesImpl</artifactId>
+                          <version>2.11.0</version>
+                        </docletArtifact>
+                      </docletArtifacts>
+                      <useStandardDocletOptions>false</useStandardDocletOptions>
+                      <additionalparam>-output ${project.build.outputDirectory}/hicc-rest.xml</additionalparam>
+                    </configuration>
+                  </execution>
                 </executions>
               </plugin>
               <plugin>
@@ -1339,50 +1370,94 @@
                 <version>1.8</version>
                 <executions>
                   <execution>
-                    <id>generate</id>
+                    <id>agent-rest</id>
+                    <goals>
+                      <goal>generate</goal>
+                    </goals>
+                    <phase>compile</phase>
+                    <configuration>
+                      <wadlFile>${project.build.outputDirectory}/application.wadl</wadlFile>
+                      <formatWadlFile>true</formatWadlFile>
+                      <baseUri>http://[host]:[port]/rest/v2</baseUri>
+                      <packagesResourceConfig>
+                        <param>org.apache.hadoop.chukwa.datacollection.agent.rest</param>
+                      </packagesResourceConfig>
+                      <wadlGenerators>
+                        <wadlGeneratorDescription>
+                          <className>com.sun.jersey.server.wadl.generators.WadlGeneratorApplicationDoc</className>
+                          <properties>
+                            <property>
+                              <name>applicationDocsFile</name>
+                              <value>${basedir}/src/main/resources/agent-rest.xml</value>
+                            </property>
+                          </properties>
+                        </wadlGeneratorDescription>
+                        <wadlGeneratorDescription>
+                          <className>com.sun.jersey.server.wadl.generators.WadlGeneratorGrammarsSupport</className>
+                          <properties>
+                            <property>
+                              <name>grammarsFile</name>
+                              <value>${basedir}/src/main/resources/application-grammars.xml</value>
+                            </property>
+                          </properties>
+                        </wadlGeneratorDescription>
+                        <wadlGeneratorDescription>
+                          <className>com.sun.jersey.server.wadl.generators.resourcedoc.WadlGeneratorResourceDocSupport</className>
+                          <properties>
+                            <property>
+                              <name>resourceDocFile</name>
+                              <value>${project.build.outputDirectory}/agent-rest.xml</value>
+                            </property>
+                          </properties>
+                        </wadlGeneratorDescription>
+                      </wadlGenerators>
+                    </configuration>
+                  </execution>
+                  <execution>
+                    <id>hicc-rest</id>
                     <goals>
                       <goal>generate</goal>
                     </goals>
                     <phase>compile</phase>
+                    <configuration>
+                      <wadlFile>${project.build.outputDirectory}/hicc-application.wadl</wadlFile>
+                      <formatWadlFile>true</formatWadlFile>
+                      <baseUri>http://[host]:[port]/hicc/v1</baseUri>
+                      <packagesResourceConfig>
+                        <param>org.apache.hadoop.chukwa.hicc.rest</param>
+                      </packagesResourceConfig>
+                      <wadlGenerators>
+                        <wadlGeneratorDescription>
+                          <className>com.sun.jersey.server.wadl.generators.WadlGeneratorApplicationDoc</className>
+                          <properties>
+                            <property>
+                              <name>applicationDocsFile</name>
+                              <value>${basedir}/src/main/resources/hicc-rest.xml</value>
+                            </property>
+                          </properties>
+                        </wadlGeneratorDescription>
+                        <wadlGeneratorDescription>
+                          <className>com.sun.jersey.server.wadl.generators.WadlGeneratorGrammarsSupport</className>
+                          <properties>
+                            <property>
+                              <name>grammarsFile</name>
+                              <value>${basedir}/src/main/resources/application-grammars.xml</value>
+                            </property>
+                          </properties>
+                        </wadlGeneratorDescription>
+                        <wadlGeneratorDescription>
+                          <className>com.sun.jersey.server.wadl.generators.resourcedoc.WadlGeneratorResourceDocSupport</className>
+                          <properties>
+                            <property>
+                              <name>resourceDocFile</name>
+                              <value>${project.build.outputDirectory}/hicc-rest.xml</value>
+                            </property>
+                          </properties>
+                        </wadlGeneratorDescription>
+                      </wadlGenerators>
+                    </configuration>
                   </execution>
                 </executions>
-                <configuration>
-                  <wadlFile>${project.build.outputDirectory}/application.wadl</wadlFile>
-                  <formatWadlFile>true</formatWadlFile>
-                  <baseUri>http://[host]:[port]/rest/v2</baseUri>
-                  <packagesResourceConfig>
-                    <param>org.apache.hadoop.chukwa.datacollection.agent.rest</param>
-                  </packagesResourceConfig>
-                  <wadlGenerators>
-                    <wadlGeneratorDescription>
-                      <className>com.sun.jersey.server.wadl.generators.WadlGeneratorApplicationDoc</className>
-                      <properties>
-                        <property>
-                          <name>applicationDocsFile</name>
-                          <value>${basedir}/src/main/resources/agent-rest.xml</value>
-                        </property>
-                      </properties>
-                    </wadlGeneratorDescription>
-                    <wadlGeneratorDescription>
-                      <className>com.sun.jersey.server.wadl.generators.WadlGeneratorGrammarsSupport</className>
-                      <properties>
-                        <property>
-                          <name>grammarsFile</name>
-                          <value>${basedir}/src/main/resources/application-grammars.xml</value>
-                        </property>
-                      </properties>
-                    </wadlGeneratorDescription>
-                    <wadlGeneratorDescription>
-                      <className>com.sun.jersey.server.wadl.generators.resourcedoc.WadlGeneratorResourceDocSupport</className>
-                      <properties>
-                        <property>
-                          <name>resourceDocFile</name>
-                          <value>${project.build.outputDirectory}/agent-rest.xml</value>
-                        </property>
-                      </properties>
-                    </wadlGeneratorDescription>
-                  </wadlGenerators>
-                </configuration>
                 <dependencies>
                   <dependency>
                     <groupId>javax.servlet</groupId>
@@ -1412,6 +1487,17 @@
                       <commandlineArgs>-o ${project.build.directory}/site/apidocs/agent-rest.html src/main/webapps/wadl.xsl target/classes/application.wadl</commandlineArgs>
                     </configuration>
                   </execution>
+                  <execution>
+                    <id>exec-xsltproc: target/hicc-application.html</id>
+                    <goals>
+                      <goal>exec</goal>
+                    </goals>
+                    <phase>package</phase>
+                    <configuration>
+                      <executable>xsltproc</executable>
+                      <commandlineArgs>-o ${project.build.directory}/site/apidocs/hicc-rest.html src/main/webapps/wadl.xsl target/hicc/WEB-INF/classes/hicc-application.wadl</commandlineArgs>
+                    </configuration>
+                  </execution>
                 </executions>
               </plugin>
             </plugins>

http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/src/main/java/org/apache/hadoop/chukwa/hicc/rest/MetricsController.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/hadoop/chukwa/hicc/rest/MetricsController.java b/src/main/java/org/apache/hadoop/chukwa/hicc/rest/MetricsController.java
index 5130523..c3f3746 100644
--- a/src/main/java/org/apache/hadoop/chukwa/hicc/rest/MetricsController.java
+++ b/src/main/java/org/apache/hadoop/chukwa/hicc/rest/MetricsController.java
@@ -35,6 +35,7 @@ import javax.ws.rs.WebApplicationException;
 import javax.ws.rs.core.Context;
 import javax.ws.rs.core.Response;
 
+import org.apache.hadoop.chukwa.datacollection.agent.rest.Examples;
 import org.apache.hadoop.chukwa.datastore.ChukwaHBaseStore;
 import org.apache.hadoop.chukwa.hicc.TimeHandler;
 import org.apache.hadoop.chukwa.hicc.bean.Series;
@@ -45,6 +46,17 @@ import com.google.gson.reflect.TypeToken;
 @Path("/metrics")
 public class MetricsController {
 
+  /**
+   * Query metrics stored in HBase table
+   * 
+   * @param request is HTTP request object
+   * @param metric is metric name
+   * @param source is data source
+   * @param start is start time
+   * @param end is end time
+   * @return Metrics JSON
+   * 
+   */
   @GET
   @Path("series/{metric}/{source}")
   @Produces("application/json")

http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/src/main/resources/hicc-rest.xml
----------------------------------------------------------------------
diff --git a/src/main/resources/hicc-rest.xml b/src/main/resources/hicc-rest.xml
new file mode 100644
index 0000000..31856e3
--- /dev/null
+++ b/src/main/resources/hicc-rest.xml
@@ -0,0 +1,54 @@
+<!--
+
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements.  See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership.  The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+
+-->
+
+<applicationDocs targetNamespace="http://research.sun.com/wadl/2006/10">
+
+<doc xml:lang="en" title="Chukwa HICC REST API">
+
+<p>
+   Chukwa HICC  provides REST interfaces for dashboard rendering.
+   The primary resources are:
+
+   <ul>
+     <li>Dashboard resource - the configuration of dashboard.</li>
+     <li>Widget resource - the configuration of widgets.</li>
+   </ul>
+   
+</p><br/>
+<p>
+   The resources and the entities that are passed to them are defined
+   using JAXB and are represented in either XML or JSON formats
+   depending on the ContentType and Accept HTTP headers. The definition
+   of the types is given in the 
+   <a href="org/apache/hadoop/chukwa/hicc/rest/package-summary.html">JavaDoc</a>.
+</p><br/>
+<p>
+   The typical usage are:
+
+   <ul>
+     <li>Create a chart</li>
+     <li>Create a widget, register chart with widget</li>
+     <li>Create a dashboard, register widget with dashboard</li>
+   </ul>
+
+</p>
+</doc>
+
+</applicationDocs>

http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/src/site/apt/Quick_Start_Guide.apt.vm
----------------------------------------------------------------------
diff --git a/src/site/apt/Quick_Start_Guide.apt.vm b/src/site/apt/Quick_Start_Guide.apt.vm
index 6248eb6..210ddde 100644
--- a/src/site/apt/Quick_Start_Guide.apt.vm
+++ b/src/site/apt/Quick_Start_Guide.apt.vm
@@ -100,34 +100,11 @@ sbin/chukwa-daemon.sh start agent
 
 * Setup Solr to index Service log files
 
-  [[1]] Start Solr with Chukwa Solr configuration:
+  [[1]] Start Solr ${solrVersion} with Chukwa Solr configuration:
 
 ---
-java -Dbootstrap_confdir=$CHUKWA_HOME/etc/solr/logs/conf -Dcollection.configName=myconf -Djetty.port=7574 -DzkHost=localhost:2181 -jar start
----
-
-* Setup Cluster Aggregation Script
-
-  For data analytics with Apache Pig, there are some additional environment setup. Apache Pig does not use the same environment variable name as Hadoop, therefore make sure the following environment are setup correctly:
-
-  [[1]] Download and setup Apache Pig 0.9.1.
-
-  [[2]] Define Apache Pig class path:
-
----
-export PIG_CLASSPATH=$HADOOP_CONF_DIR:$HBASE_CONF_DIR
----
-
-  [[3]] Create a jar file of HBASE_CONF_DIR, run:
-
----
-jar cf $CHUKWA_HOME/hbase-env.jar $HBASE_CONF_DIR
----
-
-  [[4]] Setup a cron job or Hudson job for analytics script to run periodically:
-
----
-pig -Dpig.additional.jars=${HBASE_HOME}/hbase-${hbaseVersion}.jar:${HBASE_HOME}/lib/zookeeper-${zookeeperVersion}.jar:${PIG_PATH}/pig.jar:${CHUKWA_HOME}/hbase-env.jar ${CHUKWA_HOME}/script/pig/ClusterSummary.pig
+bin/solr start -cloud -z localhost:2181
+./bin/solr create_collection -c chukwa -n chukwa
 ---
 
 * Start HICC

http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/src/site/apt/datamodel.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/datamodel.apt b/src/site/apt/datamodel.apt
index 7f44c78..c0dd928 100644
--- a/src/site/apt/datamodel.apt
+++ b/src/site/apt/datamodel.apt
@@ -57,7 +57,7 @@ HBase Schema
 
 * Metrics
 
-  Chukwa table stores time series data.
+  <chukwa> table stores time series data.
 
 ** Row Key
 
@@ -89,9 +89,9 @@ for the same source.
 
 * Metadata
 
-  Metadata is designed to store point lookup data.  For example, small amount of 
-data to describe the metric name mapping for chukwa table.  It is also used to store
-JSON blob of dashboard data.
+  <chukwa_metadata> table is designed to store point lookup data.  For example, small 
+amount of data to describe the metric name mapping for chukwa table.  It is also used to 
+store JSON blob of dashboard data.
 
 ** Row Key
 

http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/src/site/apt/pipeline.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/pipeline.apt b/src/site/apt/pipeline.apt
index 274e596..1b4dd1c 100644
--- a/src/site/apt/pipeline.apt
+++ b/src/site/apt/pipeline.apt
@@ -119,6 +119,26 @@ key value pairs to HBase table.  HBaseWriter has the following configuration:
 </property>
 ---
 
+SolrWriter
+
+  <SolrWriter> writes chunks of data to SolrCloud server.  This writer is 
+designed to write log entries to Solr for full text indexing.  
+SolrWriter can be enabled by <chukwa.pipline> property in chukwa-agent-conf.xml.
+Solr specific settings are pointer to zookeeper location to find SolrCloud
+leader and solr collection to store indexed data.
+
+---
+<property>
+  <name>solr.cloud.address</name>
+  <value>localhost:2181</value>
+</property>
+
+<property>
+  <name>solr.collection</name>
+  <value>chukwa</value>
+</property>
+---
+
 LocalWriter
 
   <LocalWriter> writes chunks of data to local disk then upload file to HDFS 

http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/src/site/apt/programming.apt
----------------------------------------------------------------------
diff --git a/src/site/apt/programming.apt b/src/site/apt/programming.apt
index 641064f..b0d1fee 100644
--- a/src/site/apt/programming.apt
+++ b/src/site/apt/programming.apt
@@ -14,7 +14,7 @@
 ~~ limitations under the License.
 ~~
 
-Chukwa User and Programming Guide
+Chukwa Programmers Guide
 
   At the core of Chukwa is a flexible system for collecting and processing
 monitoring data, particularly log files. This document describes how to use the
@@ -24,198 +24,24 @@ pipeline, see the {{{./design.html}Design Guide}}.)
   In particular, this document discusses the Chukwa archive file formats, the
 demux and archiving mapreduce jobs, and  the layout of the Chukwa storage directories.
 
-Reading data from the sink or the archive
+Agent REST API
 
-  Chukwa gives you several ways of inspecting or processing collected data.
-
-* Dumping some data
-
-  It very often happens that you want to retrieve one or more files that have been
-collected with Chukwa. If the total volume of data to be recovered is not too
-great, you can use <bin/chukwa dumpArchive>, a command-line tool that does the job.
-The <dump> tool does an in-memory sort of the data, so you'll be 
-constrained by the Java heap size (typically a few hundred MB).
-
-  The <dump> tool takes a search pattern as its first argument, followed
-by a list of files or file-globs.  It will then print the contents of every data
-stream in those files that matches the pattern. (A data stream is a sequence of
-chunks with the same host, source, and datatype.)  Data is printed in order,
-with duplicates removed.  No metadata is printed.  Separate streams are 
-separated by a row of dashes.  
-
-  For example, the following command will dump all data from every file that
-matches the glob pattern.  Note the use of single quotes to pass glob patterns
-through to the application, preventing the shell from expanding them.
-
----
-$CHUKWA_HOME/bin/chukwa dumpArchive 'datatype=.*' 'hdfs://host:9000/chukwa/archive/*.arc'
----
-
-  The patterns used by <dump> are based on normal regular 
-expressions. They are of the form <field1=regex&field2=regex>.
-That is, they are a sequence of rules, separated by ampersand signs. Each rule
-is of the form <metadatafield=regex>, where 
-<metadatafield> is one of the Chukwa metadata fields, and 
-<regex> is a regular expression.  The valid metadata field names are:
-<datatype>, <host>, <cluster>, 
-<content>, <name>.  Note that the <name> field matches the stream name -- often the filename
-that the data was extracted from.
-
-  In addition, you can match arbitrary tags via <tags.tagname>.
-So for instance, to match chunks with tag <foo="bar"> you could say
-<tags.foo=bar>. Note that quotes are present in the tag, but not
-in the filter rule.
-
-  A stream matches the search pattern only if every rule matches. So to 
-retrieve HadoopLog data from cluster foo, you might search for 
-<cluster=foo&datatype=HadoopLog>.
-
-* Exploring the Sink or Archive
-
-  Another common task is finding out what data has been collected. Chukwa offers
-a specialized tool for this purpose: <DumpArchive>. This tool has
-two modes: summarize and verbose, with the latter being the default.
-
-  In summarize mode, <DumpArchive> prints a count of chunks in each
-data stream.  In verbose mode, the chunks themselves are dumped.
-
-  You can invoke the tool by running <$CHUKWA_HOME/bin/dumpArchive.sh>.
-To specify summarize mode, pass <--summarize> as the first argument.
-
----
-bin/chukwa dumpArchive --summarize 'hdfs://host:9000/chukwa/logs/*.done'
----
-
-* Using MapReduce
-
-  A key goal of Chukwa was to facilitate MapReduce processing of collected data.
-The next section discusses the file formats.  An understanding of MapReduce
-and SequenceFiles is helpful in understanding the material.
-
-Sink File Format
-
-  As data is collected, Chukwa dumps it into <sink files> in HDFS. By
-default, these are located in <hdfs:///chukwa/logs>.  If the file name 
-ends in .chukwa, that means the file is still being written to. Every few minutes, 
-the agent will close the file, and rename the file to '*.done'.  This 
-marks the file as available for processing.
-
-  Each sink file is a Hadoop sequence file, containing a succession of 
-key-value pairs, and periodic synch markers to facilitate MapReduce access. 
-They key type is <ChukwaArchiveKey>; the value type is 
-<ChunkImpl>. See the Chukwa Javadoc for details about these classes.
-
-  Data in the sink may include duplicate and omitted chunks.
-
-Demux and Archiving
-
-  It's possible to write MapReduce jobs that directly examine the data sink, 
-but it's not extremely convenient. Data is not organized in a useful way, so 
-jobs will likely discard most of their input. Data quality is imperfect, since 
-duplicates and omissions may exist.  And MapReduce and HDFS are optimized to 
-deal with a modest number of large files, not many small ones.
-
-  Chukwa therefore supplies several MapReduce jobs for organizing collected 
-data and putting it into a more useful form; these jobs are typically run 
-regularly from cron.  Knowing how to use Chukwa-collected data requires 
-understanding how these jobs lay out storage. For now, this document only 
-discusses one such job: the Simple Archiver.
-
-Simple Archiver
-
-  The simple archiver is designed to consolidate a large number of data sink 
-files into a small number of archive files, with the contents grouped in a 
-useful way.  Archive files, like raw sink files, are in Hadoop sequence file 
-format. Unlike the data sink, however, duplicates have been removed.  (Future 
-versions of the Simple Archiver will indicate the presence of gaps.)
-
-  The simple archiver moves every <.done> file out of the sink, and 
-then runs a MapReduce job to group the data. Output Chunks will be placed into 
-files with names of the form 
-<hdfs:///chukwa/archive/clustername/Datatype_date.arc>.  
-Date corresponds to when the data was collected; Datatype is the datatype of 
-each Chunk. 
-
-  If archived data corresponds to an existing filename, a new file will be 
-created with a disambiguating suffix.
+  Chukwa Agent offers programmable API to control Agent adaptors for collecting data from
+remote sources, or setup a listening port for incoming data stream.  Usage guide and
+examples are documented in {{{./apidocs/agent-rest.html} Agent REST API doc}}.
 
 Demux
 
-  A key use for Chukwa is processing arriving data, in parallel, using MapReduce.
+  A key use for Chukwa is processing arriving data, in parallel, using Chukwa Demux.
 The most common way to do this is using the Chukwa demux framework.
-As {{{./dataflow.html}data flows through Chukwa}}, the demux job is often the
-first job that runs.
+As {{{./design.html}data flows through Chukwa}}, the demux parsers are often the
+first user defined function to process data.
 
   By default, Chukwa will use the default TsProcessor. This parser will try to
 extract the real log statement from the log entry using the ISO8601 date 
 format. If it fails, it will use the time at which the chunk was written to
 disk (agent timestamp).
 
-* Writing a custom demux Mapper
-
-  If you want to extract some specific information and perform more processing you
-need to write your own parser. Like any M/R program, your have to write at least
-the Map side for your parser. The reduce side is Identity by default.
-
-  On the Map side,you can write your own parser from scratch or extend the AbstractProcessor class
-that hides all the low level action on the chunk. See
-<org.apache.hadoop.chukwa.extraction.demux.processor.mapper.Df> for an example
-of a Map class for use with Demux.
- 
-  For Chukwa to invoke your Mapper code, you have
-to specify which data types it should run on.
-Edit <${CHUKWA_HOME}/etc/chukwa/chukwa-demux-conf.xml> and add the following lines:
-
----
-<property>
-    <name>MyDataType</name>
-    <value>org.apache.hadoop.chukwa.extraction.demux.processor.mapper.MyParser</value>
-    <description>Parser class for MyDataType.</description>
-</property>
----
-
-  You can use the same parser for several different recordTypes.
-
-* Writing a custom reduce
-
-  You only need to implement a reduce side if you need to group records together. 
-The interface that your need to implement is <ReduceProcessor>:
-
----
-public interface ReduceProcessor
-{
-           public String getDataType();
-           public void process(ChukwaRecordKey key,Iterator<ChukwaRecord> values,
-                               OutputCollector<ChukwaRecordKey, ChukwaRecord> output, 
-                               Reporter reporter);
-}
----
-
-  The link between the Map side and the reduce is done by setting your reduce class
-into the reduce type: <key.setReduceType("MyReduceClass");>
-Note that in the current version of Chukwa, your class needs to be in the package
-<org.apache.hadoop.chukwa.extraction.demux.processor>
-See <org.apache.hadoop.chukwa.extraction.demux.processor.reducer.SystemMetrics>
-for an example of a Demux reducer.
-
-* Output
-
-  Your data is going to be sorted by RecordType then by the key field. The default
-implementation use the following grouping for all records:
-
-  * Time partition (Time up to the hour)
-
-  * Machine name (physical input source)
-
-  * Record timestamp
-
-  The demux process will use the recordType to save similar records together 
-(same recordType) to the same directory: 
-
----
-<cluster name>/<record type>/
----
-
 * Demux Data To HBase
 
   Demux parsers can be configured to run in <${CHUKWA_HOME}/etc/chukwa/chukwa-demux-conf.xml>.  See 
@@ -254,204 +80,11 @@ public class SystemMetrics extends AbstractProcessor {
   In this example, the data collected by SystemMetrics parser is stored into <"SystemMetrics">
 HBase table, and column family is stored to <"cpu"> column family.
 
+HICC REST API
 
-Create a new HICC widget
-
-  HICC Widget is composed of a JSON data model.  Examples of widget descriptor 
-is located at <src/main/web/hicc/descriptors>.  The data structure looks like
-this:
-
----
-{
-  "id":"debug",
-  "title":"Session Debugger",
-  "version":"0.1",
-  "categories":"Developer,Utilities",
-  "url":"jsp/debug.jsp",
-  "description":"Display session stats",
-  "refresh":"15",
-  "parameters":[
-    {"name":"height","type":"string","value":"0","edit":"0"}
-  ]
-}
----
-
-  * <<id>> - Unique identifier of HICC widget.
-
-  * <<title>> - Human readable string for display on widget border.
-
-  * <<version>> - Version number of the widget, used for updating dashboard
-    with new version of the widget.
-
-  * <<categories>> - Category to organize the widget.  The categories hierarchy
-    is separated by comma.
-
-  * <<url>> - The URL to fetch widget content.  Use /iframe/ as prefix to
-    sandbox output of the URL in a iframe.
-
-  * <<description>> - Description of the widget to display on widget browser.
-
-  * <<refresh>> - Predefined interval to refresh widget in minutes, set refresh 
-    to 0 to disable periodical refresh.
-
-  * <<parameters>> - A list of Key Value parameters to pass to <<url>>.
-    Parameters can be constructed from the follow datatype.
-
-    [[1]] <<string>> - A text field for entering string. <edit> set the text 
-          field to be hidden and value is constant.  Example:
-
----
-{
-  "name":"height",
-  "type":"string",
-  "value":"0",
-  "edit":"0"
-}
----
-
-    [[2]] <<select>> - A drop down list for making single item selection. 
-          <label> is text string next to the drop down box.  Example:
-
----
-{
-  "name":"width",
-  "type":"select",
-  "value":"300",
-  "label":"Width",
-  "options":[
-    {"label":"300","value":"300"},
-      ...
-    {"label":"1200","value":"1200"}
-  ]
-}
----
-
-    [[3]] <<select_callback>> - Single item selection box with data source
-          provided from the <callback> url.  Example:
-
----
-{
-  "name":"time_zone",
-  "type":"select_callback",
-  "value":"UTC",
-  "label":"Time Zone",
-  "callback":"/hicc/jsp/get_timezone_list.jsp"
-}
----
-
-    [[4]] <<select_multiple>> - Multiple item selection box.
-
----
-{
-  "name":"data",
-  "type":"select_multiple",
-  "value":"default",
-  "label":"Metric",
-  "options":[
-    {"label":"Selection 1","value":"1"},
-    {"label":"Selection 2","value":"2"}
-  ]
-}
----
-
-    [[5]] <<radio>> - Radio button for making boolean selection.
-
----
-{
-  "name":"legend",
-  "type":"radio",
-  "value":"on",
-  "label":"Show Legends",
-  "options":[
-    {"label":"On","value":"on"},
-    {"label":"Off","value":"off"}
-  ]
-}
----
-
-    [[6]] <<custom>> - Custom Javascript control.  <control> is a javascript
-          function defined in <src/main/web/hicc/js/workspace/custom_edits.js>.
-
----
-{
-  "name":"period",
-  "type":"custom",
-  "control":"period_control",
-  "value":"",
-  "label":"Period"
-}
----
-
-HICC Metrics REST API
-
-  HICC metrics API is designed to run HBase scan function.  One thing to
-keep in mind that the down sampling framework has not been built.  Therefore,
-scanning large number of metrics on HBase may take a long time.
-
-  * Retrieve a time series metrics for a given column and use session key
-    as row key.
-
----
-/hicc/v1/metrics/series/{table}/{column}/session/{sessionKey}?start={long}&end
-={long}&fullScan={boolean}
----
-
-  * Retrieve a time series metrics for a given column and given row key.
-
----
-/hicc/v1/metrics/series/{table}/{family}/{column}/rowkey/{rkey}?start={long}&end={long}&fullScan={boolean}
----
-
-  * Scan for column names with in a column family.
-
----
-/hicc/v1/metrics/schema{table}/{family}?start={long}&end={long}&fullScan={boolean}
----
-
-  * Scan table for unique row names.
-
----
-/hicc/v1/metrics/rowkey/{table}/{family}/{column}?start={long}&end={long}&fullScan={boolean}
----
-
-HICC Charting API
-
-  HICC Chart.jsp is the generic interface for piping HICC metrics REST API
-JSON to javascript rendered charting library.  The supported options are:
-
-  * <<title>> - Display a title string on chart.
-
-  * <<width>> - Width of the chart in pixels.
-
-  * <<height>> - Height of the chart in pixels.
-
-  * <<render>> - Type of graph to display.  Available options are:
-    <line>, <bar>, <point>, <area>, <stack-area>.
-
-  * <<series_name>> - Label for series name.
-
-  * <<data>> - URL to retrieve series of JSON data.
-
-  * <<x_label>> - Toggle to display X axis label (<on> or <off>).
-
-  * <<x_axis_label>> - A string to display on X axis of the chart.
-
-  * <<y_label>> - Toggle to display Y axis label (<on> or <off>).
-
-  * <<ymin>> - Y axis minimum value.
-
-  * <<ymax>> or <<y_axis_max>> - Y axis maximum value.
-
-  * <<legend>> - Toggle to display legend for the chart (<on> or <off>).
-
-[]
-
-  Example of using Charting API in combination with HICC Metrics REST API:
-
----
-http://localhost:4080/hicc/jsp/chart.jsp?width=300&height=200&data=/hicc/v1/metrics/series/ClusterSummary/memory:UsedPercent/session/cluster,/hicc/v1/metrics/series/ClusterSummary/memory:FreePercent/session/cluster&title=Memory%20Utilization
----
+  HICC visualization API offers simple API to compose dashboard, and charting widgets.
+Data visualization API offers features for end user to interact with data in the 
+final product format.  They are designed to display and summarize data for human
+interaction.  HICC usage guide and examples are documented in 
+{{{./apidocs/hicc-rest.html} HICC REST API doc}}.
 
-  In this example, the width of the chart is set to 300, and height set to 200.  The chart has a 
-title of <Memory Utilization>, and streaming data from <ClusterSummary> table for column family
-<memory> with <UsedPercent> and <FreePercent> metrics using session key <cluster> as the row key.

http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/src/site/apt/user.apt.vm
----------------------------------------------------------------------
diff --git a/src/site/apt/user.apt.vm b/src/site/apt/user.apt.vm
new file mode 100644
index 0000000..6c2a790
--- /dev/null
+++ b/src/site/apt/user.apt.vm
@@ -0,0 +1,260 @@
+~~ Licensed to the Apache Software Foundation (ASF) under one or more
+~~ contributor license agreements.  See the NOTICE file distributed with
+~~ this work for additional information regarding copyright ownership.
+~~ The ASF licenses this file to You under the Apache License, Version 2.0
+~~ (the "License"); you may not use this file except in compliance with
+~~ the License.  You may obtain a copy of the License at
+~~
+~~     http://www.apache.org/licenses/LICENSE-2.0
+~~
+~~ Unless required by applicable law or agreed to in writing, software
+~~ distributed under the License is distributed on an "AS IS" BASIS,
+~~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+~~ See the License for the specific language governing permissions and
+~~ limitations under the License.
+~~
+
+Chukwa User Guide
+
+  This chapter is the detailed configuration guide to Chukwa configuration.
+
+  Please read this chapter carefully and ensure that all requirements have 
+been satisfied. Failure to do so will cause you (and us) grief debugging 
+strange errors and/or data loss.
+
+  Chukwa uses the same configuration system as Hadoop. To configure a deploy, 
+edit a file of environment variables in etc/chukwa/chukwa-env.sh -- this 
+configuration is used mostly by the launcher shell scripts getting the 
+cluster off the ground -- and then add configuration to an XML file to do 
+things like override Chukwa defaults, tell Chukwa what Filesystem to use, 
+or the location of the HBase configuration.
+
+  When running in distributed mode, after you make an edit to an Chukwa 
+configuration, make sure you copy the content of the conf directory to all 
+nodes of the cluster. Chukwa will not do this for you. Use rsync.
+
+Pre-requisites
+
+  Chukwa should work on any POSIX platform, but GNU/Linux is the only
+production platform that has been tested extensively. Chukwa has also been used
+successfully on Mac OS X, which several members of the Chukwa team use for 
+development.
+
+  The only absolute software requirements are Java 1.6 or better,
+ZooKeeper {{${zookeeperVersion}}}, HBase {{${hbaseVersion}}} and Hadoop {{${hadoopVersion}}}.
+  
+  The Chukwa cluster management scripts rely on <ssh>; these scripts, however,
+are not required if you have some alternate mechanism for starting and stopping
+daemons.
+
+Installing Chukwa
+
+  A minimal Chukwa deployment has five components:
+
+  * A Hadoop and HBase cluster on which Chukwa will process data (referred to as the Chukwa cluster).
+
+  * One or more agent processes, that send monitoring data to HBase.
+    The nodes with active agent processes are referred to as the monitored 
+    source nodes.
+
+  * Data analytics script, summarize Hadoop Cluster Health.
+
+  * HICC, the Chukwa visualization tool.
+
+[]
+
+[./images/chukwa_architecture.png] Chukwa Components
+
+* First Steps
+
+  * Obtain a copy of Chukwa. You can find the latest release on the 
+    {{{http://hadoop.apache.org/chukwa/releases.html} Chukwa release page}}.
+
+  * Un-tar the release, via <tar xzf>.
+
+  * Make sure a copy of Chukwa is available on each node being monitored.
+
+  * We refer to the directory containing Chukwa as <CHUKWA_HOME>. It may
+be helpful to set <CHUKWA_HOME> explicitly in your environment,
+but Chukwa does not require that you do so.
+
+* General Configuration
+
+  * Make sure that <JAVA_HOME> is set correctly and points to a Java 1.6 JRE. 
+It's generally best to set this in <CHUKWA_HOME/etc/chukwa/chukwa-env.sh>.
+
+  * In <CHUKWA_HOME/etc/chukwa/chukwa-env.sh>, set <CHUKWA_LOG_DIR> and
+<CHUKWA_PID_DIR> to the directories where Chukwa should store its
+console logs and pid files.  The pid directory must not be shared between
+different Chukwa instances: it should be local, not NFS-mounted.
+
+  * Optionally, set CHUKWA_IDENT_STRING. This string is
+ used to name Chukwa's own console log files.
+
+Agents
+
+  Agents are the Chukwa processes that actually produce data. This section
+describes how to configure and run them. More details are available in the
+{{{./agent.html} Agent configuration guide}}.
+
+* Configuration
+
+  First, edit <$CHUKWA_HOME/etc/chukwa/chukwa-env.sh> In addition to 
+the general directions given above, you should set <HADOOP_CONF_DIR> and
+<HBASE_CONF_DIR>.  This should be the Hadoop deployment Chukwa will use to 
+store collected data.  You will get a version mismatch error if this is 
+configured incorrectly.
+
+  Edit the <CHUKWA_HOME/etc/chukwa/initial_adaptors> configuration file. 
+This is where you tell Chukwa what log files to monitor. See
+{{{./agent.html#Adaptors} the adaptor configuration guide}} for
+a list of available adaptors.
+
+  There are a number of optional settings in 
+<$CHUKWA_HOME/etc/chukwa/chukwa-agent-conf.xml>:
+
+  * The most important of these is the cluster/group name that identifies the
+monitored source nodes. This value is stored in each Chunk of collected data;
+you can therefore use it to distinguish data coming from different groups of 
+machines.
+
+---
+  <property>
+    <name>chukwaAgent.tags</name>
+    <value>cluster="demo"</value>
+    <description>The cluster's name for this agent</description>
+  </property>
+---
+
+  * Another important option is <chukwaAgent.checkpoint.dir>.
+This is the directory Chukwa will use for its periodic checkpoints of 
+running adaptors.  It <<must not>> be a shared directory; use a local, 
+not NFS-mount, directory.
+
+  * Setting the option <chukwaAgent.control.remote> will disallow remote 
+connections to the agent control socket.
+
+** Use HBase For Data Storage
+
+  * Configuring the pipeline: set HBaseWriter as your writer, or add it 
+    to the pipeline if you are using 
+
+---
+  <property>
+    <name>chukwa.agent.connector</name>
+    <value>org.apache.hadoop.chukwa.datacollection.connector.PipelineConnector</value>
+  </property>
+
+  <property>
+    <name>chukwa.pipeline</name>
+    <value>org.apache.hadoop.chukwa.datacollection.writer.hbase.HBaseWriter</value>
+  </property>
+---
+
+** Use HDFS For Data Storage
+
+  The one mandatory configuration parameter is <writer.hdfs.filesystem>.
+This should be set to the HDFS root URL on which Chukwa will store data.
+Various optional configuration options are described in 
+{{{./pipeline.html} the pipeline configuration guide}}.
+
+* Starting, Stopping, And Monitoring
+
+  To run an agent process on a single node, use:
+
+---
+  sbin/chukwa-daemon.sh start agent
+---
+
+  Typically, agents run as daemons. The script <bin/start-agents.sh> 
+will ssh to each machine listed in <etc/chukwa/agents> and start an agent,
+running in the background. The script <bin/stop-agents.sh> 
+does the reverse.
+
+  You can, of course, use any other daemon-management system you like. 
+For instance, <tools/init.d> includes init scripts for running
+Chukwa agents.
+
+  To check if an agent is working properly, you can telnet to the control
+port (9093 by default) and hit "enter". You will get a status message if
+the agent is running normally.
+
+Configuring Hadoop For Monitoring
+
+  One of the key goals for Chukwa is to collect logs from Hadoop clusters. 
+This section describes how to configure Hadoop to send its logs to Chukwa. 
+Note that these directions require Hadoop 0.205.0+.  Earlier versions of 
+Hadoop do not have the hooks that Chukwa requires in order to grab 
+MapReduce job logs.
+
+  The Hadoop configuration files are located in <HADOOP_HOME/etc/hadoop>.
+To setup Chukwa to collect logs from Hadoop, you need to change some of the 
+Hadoop configuration files.
+
+  * Copy CHUKWA_HOME/etc/chukwa/hadoop-log4j.properties file to HADOOP_CONF_DIR/log4j.properties
+
+  * Copy CHUKWA_HOME/etc/chukwa/hadoop-metrics2.properties file to HADOOP_CONF_DIR/hadoop-metrics2.properties
+
+  * Edit HADOOP_HOME/etc/hadoop/hadoop-metrics2.properties file and change $CHUKWA_LOG_DIR to your actual CHUKWA log dirctory (ie, CHUKWA_HOME/var/log)
+
+Setup HBase Table
+
+  Chukwa is moving towards a model of using HBase to store metrics data to 
+allow real-time charting. This section describes how to configure HBase and 
+HICC to work together.
+
+  * Presently, we support HBase 0.96+. If you have older HBase jars anywhere, 
+they will cause linkage errors.  Check for and remove them.
+
+  * Setting up tables:
+
+---
+  hbase/bin/hbase shell < etc/chukwa/hbase.schema
+---
+
+HICC
+
+* Configuration
+
+  Edit <etc/chukwa/auth.conf> and add authorized user to the list.
+
+* Starting, Stopping, And Monitoring
+
+  The Hadoop Infrastructure Care Center (HICC) is the Chukwa web user interface.
+HICC is started by invoking
+
+---
+  sbin/chukwa-daemon.sh start hicc
+---
+
+  Once the webcontainer with HICC has been started, point your favorite 
+browser to:
+
+---
+  http://<server>:4080/hicc
+---
+
+Troubleshooting Tips
+
+* UNIX Processes For Chukwa Data Processes
+
+  Chukwa Data Processors are identified by:
+
+---
+  org.apache.hadoop.chukwa.datacollection.agent.ChukwaAgent
+  org.apache.hadoop.chukwa.hicc.HiccWebServer
+---
+
+  The processes are scheduled execution, therefore they are not always 
+visible from the process list.
+
+* Emergency Shutdown Procedure
+
+  If the system is not functioning properly and you cannot find an answer in 
+the Administration Guide, execute the kill command.  The current state of 
+the java process will be written to the log files. You can analyze 
+these files to determine the cause of the problem.
+
+---
+kill -3 <pid>
+---

http://git-wip-us.apache.org/repos/asf/chukwa/blob/84ade828/src/site/site.xml
----------------------------------------------------------------------
diff --git a/src/site/site.xml b/src/site/site.xml
index a5b29a4..25b246d 100644
--- a/src/site/site.xml
+++ b/src/site/site.xml
@@ -49,6 +49,7 @@
       </item>
       <item name="Programming Guide" href="programming.html">
         <item name="Agent REST API" href="apidocs/agent-rest.html"/>
+        <item name="HICC REST API" href="apidocs/hicc-rest.html"/>
         <item name="Javadocs" href="apidocs/index.html"/>
       </item>
       <item name="Architecture" href="design.html">


Mime
View raw message