flume-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From rgo...@apache.org
Subject svn commit: r1351228 [4/4] - in /incubator/flume/branches/flume-1262: ./ flume-ng-channels/flume-file-channel/src/site/ flume-ng-channels/flume-jdbc-channel/src/site/ flume-ng-channels/flume-recoverable-memory-channel/src/site/ flume-ng-channels/src/ f...
Date Mon, 18 Jun 2012 08:21:21 GMT
Added: incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/log4jappender.xml
URL: http://svn.apache.org/viewvc/incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/log4jappender.xml?rev=1351228&view=auto
==============================================================================
--- incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/log4jappender.xml (added)
+++ incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/log4jappender.xml Mon Jun
18 08:21:19 2012
@@ -0,0 +1,86 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document xmlns="http://www.w3.org/TR/xhtml1/strict">
+  <properties>
+    <title>Flume 1.x User Guide</title>
+  </properties>
+  <body>
+    <section name="Flume 1.x User Guide"><!-- Licensed to the Apache Software Foundation
(ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. -->
+      <a name="log4j-appender" id="log4j-appender"/>
+      <subsection name="Log4J Appender">
+        <p>Appends Log4j events to a flume agent's avro source. A client using this
+          appender must have the flume-ng-sdk in the classpath (eg,
+          flume-ng-sdk-1.2.0-incubating-SNAPSHOT.jar).
+          Required properties are in<b>bold</b>.
+        </p>
+        <table border="1">
+          <colgroup>
+            <col width="13%"/>
+            <col width="7%"/>
+            <col width="74%"/>
+          </colgroup>
+          <thead>
+            <tr>
+              <th>
+                <p>Property Name</p>
+              </th>
+              <th>
+                <p>Default</p>
+              </th>
+              <th>
+                <p>Description</p>
+              </th>
+            </tr>
+          </thead>
+          <tbody>
+            <tr>
+              <td>
+                <p>Hostname</p>
+              </td>
+              <td>
+                <p>--</p>
+              </td>
+              <td>
+                <p>The hostname on which a remote Flume agent is running with an avro
source.</p>
+              </td>
+            </tr>
+            <tr>
+              <td>
+                <p>Port</p>
+              </td>
+              <td>
+                <p>--</p>
+              </td>
+              <td>
+                <p>The port at which the remote Flume agent's avro source is listening.</p>
+              </td>
+            </tr>
+          </tbody>
+        </table>
+        <p>Sample log4j.properties file:</p>
+        <source>
+          #...
+          log4j.appender.flume = org.apache.flume.clients.log4jappender.Log4jAppender
+          log4j.appender.flume.Hostname = example.com
+          log4j.appender.flume.Port = 41414
+
+          # configure a class's logger to output to the flume appender
+          log4j.logger.org.example.MyClass = DEBUG,flume
+          #...
+        </source>
+      </subsection>
+    </section>
+  </body>
+</document>
\ No newline at end of file

Added: incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/monitoring.xml
URL: http://svn.apache.org/viewvc/incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/monitoring.xml?rev=1351228&view=auto
==============================================================================
--- incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/monitoring.xml (added)
+++ incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/monitoring.xml Mon Jun 18
08:21:19 2012
@@ -0,0 +1,27 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document xmlns="http://www.w3.org/TR/xhtml1/strict">
+  <properties>
+    <title>Flume 1.x User Guide</title>
+  </properties>
+  <body>
+    <section name="Flume 1.x User Guide"><!-- Licensed to the Apache Software Foundation
(ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. -->
+      <a name="monitoring" id="monitoring"/>
+      <subsection name="Monitoring">
+        <p>TBD</p>
+      </subsection>
+    </section>
+  </body>
+</document>
\ No newline at end of file

Added: incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/overview.xml
URL: http://svn.apache.org/viewvc/incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/overview.xml?rev=1351228&view=auto
==============================================================================
--- incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/overview.xml (added)
+++ incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/overview.xml Mon Jun 18 08:21:19
2012
@@ -0,0 +1,96 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document xmlns="http://www.w3.org/TR/xhtml1/strict">
+  <properties>
+    <title>Flume 1.x User Guide</title>
+  </properties>
+  <body>
+    <section name="Flume 1.x User Guide"><!-- Licensed to the Apache Software Foundation
(ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. -->
+      <a name="introduction" id="introduction"/>
+      <subsection name="Introduction">
+        <a name="overview" id="overview"/>
+        <h4>Overview</h4>
+        <p>Apache Flume is a distributed, reliable, and available system for efficiently
+          collecting, aggregating and moving large amounts of log data from many
+          different sources to a centralized data store.
+        </p>
+        <p>At the moment Flume is an incubating Apache project. There are currently
two
+          release code lines available, version 0.9.x and 1.x.x. This guide is specific
+          to 1.x (more specifically 1.1.0 release). Please click here for<a
+              href="http://archive.cloudera.com/cdh/3/flume/UserGuide/" id="">the Flume
+            0.9.x User Guide</a>.
+        </p>
+        <a name="system-requirements" id="system-requirements"/>
+        <h4>System Requirements</h4>
+        <p>TBD</p>
+        <a name="architecture" id="architecture"/>
+        <h4>Architecture</h4>
+        <a name="data-flow-model" id="data-flow-model"/>
+        <h5>Data flow model</h5>
+        <p>A Flume event is defined as a unit of data flow having a byte payload and
an
+          optional set of string attributes. A Flume agent is a (JVM) process that hosts
+          the components through which events flow from an external source to the next
+          destination (hop).
+        </p>
+        <img alt="Agent component diagram" src="../images/UserGuide_image00.png"/>
+        <p>A Flume source consumes events delivered to it by an external source like
a web
+          server. The external source sends events to Flume in a format that is
+          recognized by the target Flume source. For example, an Avro Flume source can be
+          used to receive Avro events from Avro clients or other Flume agents in the flow
+          that send events from an Avro sink. When a Flume source receives an event, it
+          stores it into one or more channels. The channel is a passive store that keeps
+          the event until it's consumed by a Flume sink. The JDBC channel is one example
+          -- it uses a filesystem backed embedded database. The sink removes the event
+          from the channel and puts it into an external repository like HDFS (via Flume
+          HDFS sink) or forwards it to the Flume source of the next Flume agent (next
+          hop) in the flow. The source and sink within the given agent run asynchronously
+          with the events staged in the channel.
+        </p>
+        <a name="complex-flows" id="complex-flows"/>
+        <h5>Complex flows</h5>
+        <p>Flume allows a user to build multi-hop flows where events travel through
+          multiple agents before reaching the final destination. It also allows fan-in
+          and fan-out flows, contextual routing and backup routes (fail-over) for failed
+          hops.
+        </p>
+        <a name="reliability" id="reliability"/>
+        <h5>Reliability</h5>
+        <p>The events are staged in a channel on each agent. The events are then delivered
+          to the next agent or terminal repository (like HDFS) in the flow. The events
+          are removed from a channel only after they are stored in the channel of next
+          agent or in the terminal repository. This is a how the single-hop message
+          delivery semantics in Flume provide end-to-end reliability of the flow.
+        </p>
+        <p>Flume uses a transactional approach to guarantee the reliable delivery of
the
+          events. The sources and sinks encapsulate in a transaction the
+          storage/retrieval, respectively, of the events placed in or provided by a
+          transaction provided by the channel. This ensures that the set of events are
+          reliably passed from point to point in the flow. In the case of a multi-hop
+          flow, the sink from the previous hop and the source from the next hop both have
+          their transactions running to ensure that the data is safely stored in the
+          channel of the next hop.
+        </p>
+        <a name="recoverability" id="recoverability"/>
+        <h5>Recoverability</h5>
+        <p>The events are staged in the channel, which manages recovery from failure.
+          Flume supports a durable JDBC channel which is backed by a relational database.
+          There's also a memory channel which simply stores the events in an in-memory
+          queue, which is faster but any events still left in the memory channel when an
+          agent process dies can't be recovered.
+        </p>
+      </subsection>
+   </section>
+  </body>
+</document>
\ No newline at end of file

Added: incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/security.xml
URL: http://svn.apache.org/viewvc/incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/security.xml?rev=1351228&view=auto
==============================================================================
--- incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/security.xml (added)
+++ incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/security.xml Mon Jun 18 08:21:19
2012
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document xmlns="http://www.w3.org/TR/xhtml1/strict">
+  <properties>
+    <title>Flume 1.x User Guide</title>
+  </properties>
+  <body>
+    <section name="Flume 1.x User Guide"><!-- Licensed to the Apache Software Foundation
(ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. -->
+      <a name="security" id="security"/>
+      <subsection name="Security">
+        <p>The HDFS sink supports Kerberos authentication if the underlying HDFS is
+          running in secure mode. Please refer to the HDFS Sink section for
+          configuring the HDFS sink Kerberos-related options.
+        </p>
+      </subsection>
+    </section>
+  </body>
+</document>
\ No newline at end of file

Added: incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/setup.xml
URL: http://svn.apache.org/viewvc/incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/setup.xml?rev=1351228&view=auto
==============================================================================
--- incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/setup.xml (added)
+++ incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/setup.xml Mon Jun 18 08:21:19
2012
@@ -0,0 +1,144 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document xmlns="http://www.w3.org/TR/xhtml1/strict">
+  <properties>
+    <title>Flume 1.x User Guide</title>
+  </properties>
+  <body>
+    <section name="Flume 1.x User Guide"><!-- Licensed to the Apache Software Foundation
(ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. -->
+      <a name="setup" id="setup"/>
+      <subsection name="Setup">
+        <a name="setting-up-an-agent" id="setting-up-an-agent"/>
+        <h4>Setting up an agent</h4>
+        <p>Flume agent configuration is stored in a local configuration file. This
is a
+          text file which has a format follows the Java properties file format.
+          Configurations for one or more agents can be specified in the same
+          configuration file. The configuration file includes properties of each source,
+          sink and channel in an agent and how they are wired together to form data
+          flows.
+        </p>
+        <a name="configuring-individual-components" id="configuring-individual-components"/>
+        <h5>Configuring individual components</h5>
+        <p>Each component (source, sink or channel) in the flow has a name, type, and
set
+          of properties that are specific to the type and instantiation. For example, an
+          Avro source needs a hostname (or IP address) and a port number to receive data
+          from. A memory channel can have max queue size ("capacity"), and an HDFS sink
+          needs to know the file system URI, path to create files, frequency of file
+          rotation ("hdfs.rollInterval") etc. All such attributes of a component needs to
+          be set in the properties file of the hosting Flume agent.
+        </p>
+        <a name="wiring-the-pieces-together" id="wiring-the-pieces-together"/>
+        <h5>Wiring the pieces together</h5>
+        <p>The agent needs to know what individual components to load and how they
are
+          connected in order to constitute the flow. This is done by listing the names of
+          each of the sources, sinks and channels in the agent, and then specifying the
+          connecting channel for each sink and source. For example, a agent flows events
+          from an Avro source called avroWeb to HDFS sink hdfs-cluster1 via a JDBC
+          channel called jdbc-channel. The configuration file will contain names of these
+          components and jdbc-channel as a shared channel for both avroWeb source and
+          hdfs-cluster1 sink.
+        </p>
+        <a name="starting-an-agent" id="starting-an-agent"/>
+        <h5>Starting an agent</h5>
+        <p>An agent is started using a shell script called flume-ng which is located
in
+          the bin directory of the Flume distribution. You need to specify the agent
+          name, the config directory, and the config file on the command line:
+        </p>
+        <pre class="literal_block">$ bin/flume-ng agent -n agent -c conf -f conf/flume-conf.properties.template</pre>
+        <p>Now the agent will start running source and sinks configured in the given
+          properties file.
+        </p>
+        <a name="data-ingestion" id="data-ingestion"/>
+        <h4>Data ingestion</h4>
+        <p>Flume supports a number of mechanisms to ingest data from external sources.</p>
+        <a name="rpc" id="rpc"/>
+        <h5>RPC</h5>
+        <p>An Avro client included in the Flume distribution can send a given file
to
+          Flume Avro source using avro RPC mechanism:
+        </p>
+        <pre class="literal_block">$ bin/flume-ng avro-client -H localhost -p 41414
-F /usr/logs/log.10</pre>
+        <p>The above command will send the contents of /usr/logs/log.10 to to the Flume
+          source listening on that ports.
+        </p>
+        <a name="executing-commands" id="executing-commands"/>
+        <h5>Executing commands</h5>
+        <p>There's an exec source that executes a given command and consumes the output.
A
+          single 'line' of output ie. text followed by carriage return ('\r') or line
+          feed ('\n') or both together.
+        </p>
+        <div class="note">
+          <p class="title">note :</p>
+          <p class="body">
+            <p>Flume does not support tail as a source. One can wrap the tail command
in an exec source to stream the
+              file.
+            </p>
+          </p>
+        </div>
+        <a name="network-streams" id="network-streams"/>
+        <h5>Network streams</h5>
+        <p>Flume supports the following mechanisms to read data from popular log stream
+          types, such as:
+        </p>
+        <ol type="1">
+          <li>
+            <p>Avro</p>
+          </li>
+          <li>
+            <p>Syslog</p>
+          </li>
+          <li>
+            <p>Netcat</p>
+          </li>
+        </ol>
+        <a name="setting-multi-agent-flow" id="setting-multi-agent-flow"/>
+        <h4>Setting multi-agent flow</h4>
+        <img alt="Two agents communicating over Avro RPC" src="../images/UserGuide_image03.png"/>
+        <p>In order to flow the data across multiple agents or hops, the sink of the
+          previous agent and source of the current hop need to be avro type with the sink
+          pointing to the hostname (or IP address) and port of the source.
+        </p>
+        <a name="consolidation" id="consolidation"/>
+        <h4>Consolidation</h4>
+        <p>A very common scenario in log collection is a large number of log producing
+          clients sending data to a few consumer agents that are attached to the storage
+          subsystem. For examples, logs collected from hundreds of web servers sent to a
+          dozen of agents that write to HDFS cluster.
+        </p>
+        <img alt="A fan-in flow using Avro RPC to consolidate events in one place" src="../images/UserGuide_image02.png"/>
+        <p>This can be achieved in Flume by configuring a number of first tier agents
with
+          an avro sink, all pointing to an avro source of single agent. This source on
+          the second tier agent consolidates the received events into a single channel
+          which is consumed by a sink to its final destination.
+        </p>
+        <a name="multiplexing-the-flow" id="multiplexing-the-flow"/>
+        <h4>Multiplexing the flow</h4>
+        <p>Flume supports multiplexing the event flow to one or more destinations.
This is
+          achieved by defining a flow multiplexer that can replicate or selectively route
+          an event to one or more channels.
+        </p>
+        <img alt="A fan-out flow using a (multiplexing) channel selector" src="../images/UserGuide_image01.png"/>
+        <p>The above example shows a source from agent "foo" fanning out the flow to
three
+          different channels. This fan out can be replicating or multiplexing. In case of
+          replicating flow, each event is sent to all three channels. For the
+          multiplexing case, an event is delivered to a subset of available channels when
+          an event's attribute matches a preconfigured value. For example, if an event
+          attribute called "txnType" is set to "customer", then it should go to channel1
+          and channel3, if it's "vendor" then it should go to channel2, otherwise
+          channel3. The mapping can be set in the agent's configuration file.
+        </p>
+      </subsection>
+    </section>
+  </body>
+</document>
\ No newline at end of file

Added: incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/troubleshoot.xml
URL: http://svn.apache.org/viewvc/incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/troubleshoot.xml?rev=1351228&view=auto
==============================================================================
--- incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/troubleshoot.xml (added)
+++ incubator/flume/branches/flume-1262/src/site/xdoc/usersGuide/troubleshoot.xml Mon Jun
18 08:21:19 2012
@@ -0,0 +1,53 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document xmlns="http://www.w3.org/TR/xhtml1/strict">
+  <properties>
+    <title>Flume 1.x User Guide</title>
+  </properties>
+  <body>
+    <section name="Flume 1.x User Guide"><!-- Licensed to the Apache Software Foundation
(ASF) under one or more
+contributor license agreements.  See the NOTICE file distributed with
+this work for additional information regarding copyright ownership.
+The ASF licenses this file to You under the Apache License, Version 2.0
+(the "License"); you may not use this file except in compliance with
+the License.  You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. -->
+      <a name="troubleshooting" id="troubleshooting"/>
+      <subsection name="Troubleshooting">
+        <a name="handling-agent-failures" id="handling-agent-failures"/>
+        <h4>Handling agent failures</h4>
+        <p>If the Flume agent goes down then the all the flows hosted on that agent
are
+          aborted. Once the agent is restarted, then flow will resume. The flow using
+          jdbc or other stable channel will resume processing events where it left
+          off. If the agent can't be restarted on the same, then there an option to
+          migrate the database to another hardware and setup a new Flume agent that
+          can resume processing the events saved in the db. The database HA futures
+          can be leveraged to move the Flume agent to another host.
+        </p>
+        <a name="compatibility" id="compatibility"/>
+        <h4>Compatibility</h4>
+        <a name="hdfs" id="hdfs"/>
+        <h5>HDFS</h5>
+        <p>Currently Flume supports HDFS 0.20.2 and 0.23.</p>
+        <a name="avro" id="avro"/>
+        <h5>AVRO</h5>
+        <p>TBD</p>
+        <a name="additional-version-requirements" id="additional-version-requirements"/>
+        <h5>Additional version requirements</h5>
+        <p>TBD</p>
+        <a name="tracing" id="tracing"/>
+        <h4>Tracing</h4>
+        <p>TBD</p>
+        <a name="more-sample-configs" id="more-sample-configs"/>
+        <h4>More Sample Configs</h4>
+        <p>TBD</p>
+      </subsection>
+    </section>
+  </body>
+</document>
\ No newline at end of file



Mime
View raw message