accumulo-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ctubb...@apache.org
Subject [1/2] accumulo-website git commit: Add post about running on Fedora 25
Date Mon, 19 Dec 2016 23:44:26 GMT
Repository: accumulo-website
Updated Branches:
  refs/heads/asf-site b866c1545 -> ac9e7868f
  refs/heads/master 62b91e017 -> f261f85ad


Add post about running on Fedora 25


Project: http://git-wip-us.apache.org/repos/asf/accumulo-website/repo
Commit: http://git-wip-us.apache.org/repos/asf/accumulo-website/commit/f261f85a
Tree: http://git-wip-us.apache.org/repos/asf/accumulo-website/tree/f261f85a
Diff: http://git-wip-us.apache.org/repos/asf/accumulo-website/diff/f261f85a

Branch: refs/heads/master
Commit: f261f85ad67bde330fe0bd6a262d73b2b53b9197
Parents: 62b91e0
Author: Christopher Tubbs <ctubbsii@apache.org>
Authored: Wed Dec 14 21:10:45 2016 -0500
Committer: Christopher Tubbs <ctubbsii@apache.org>
Committed: Mon Dec 19 18:39:11 2016 -0500

----------------------------------------------------------------------
 Gemfile                                        |   2 +-
 Gemfile.lock                                   |  89 ++++-
 _posts/blog/2016-12-19-running-on-fedora-25.md | 372 ++++++++++++++++++++
 3 files changed, 445 insertions(+), 18 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/f261f85a/Gemfile
----------------------------------------------------------------------
diff --git a/Gemfile b/Gemfile
index 3d5739a..368a482 100644
--- a/Gemfile
+++ b/Gemfile
@@ -1,3 +1,3 @@
 source 'https://rubygems.org'
-gem 'github-pages', '104', group: :jekyll_plugins
+gem 'github-pages', '110', group: :jekyll_plugins
 gem 'therubyracer'

http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/f261f85a/Gemfile.lock
----------------------------------------------------------------------
diff --git a/Gemfile.lock b/Gemfile.lock
index 799212e..4d44dac 100644
--- a/Gemfile.lock
+++ b/Gemfile.lock
@@ -7,36 +7,55 @@ GEM
       minitest (~> 5.1)
       thread_safe (~> 0.3, >= 0.3.4)
       tzinfo (~> 1.1)
-    addressable (2.4.0)
+    addressable (2.5.0)
+      public_suffix (~> 2.0, >= 2.0.2)
     coffee-script (2.4.1)
       coffee-script-source
       execjs
-    coffee-script-source (1.10.0)
+    coffee-script-source (1.11.1)
     colorator (1.1.0)
-    ethon (0.9.1)
+    ethon (0.10.1)
       ffi (>= 1.3.0)
     execjs (2.7.0)
-    faraday (0.9.2)
+    faraday (0.10.0)
       multipart-post (>= 1.2, < 3)
     ffi (1.9.14)
     forwardable-extended (2.6.0)
     gemoji (2.1.0)
-    github-pages (104)
+    github-pages (110)
       activesupport (= 4.2.7)
-      github-pages-health-check (= 1.2.0)
-      jekyll (= 3.3.0)
+      github-pages-health-check (= 1.3.0)
+      jekyll (= 3.3.1)
       jekyll-avatar (= 0.4.2)
       jekyll-coffeescript (= 1.0.1)
+      jekyll-default-layout (= 0.1.4)
       jekyll-feed (= 0.8.0)
       jekyll-gist (= 1.4.0)
       jekyll-github-metadata (= 2.2.0)
       jekyll-mentions (= 1.2.0)
+      jekyll-optional-front-matter (= 0.1.2)
       jekyll-paginate (= 1.1.0)
+      jekyll-readme-index (= 0.0.3)
       jekyll-redirect-from (= 0.11.0)
+      jekyll-relative-links (= 0.2.1)
       jekyll-sass-converter (= 1.3.0)
       jekyll-seo-tag (= 2.1.0)
       jekyll-sitemap (= 0.12.0)
       jekyll-swiss (= 0.4.0)
+      jekyll-theme-architect (= 0.0.3)
+      jekyll-theme-cayman (= 0.0.3)
+      jekyll-theme-dinky (= 0.0.3)
+      jekyll-theme-hacker (= 0.0.3)
+      jekyll-theme-leap-day (= 0.0.3)
+      jekyll-theme-merlot (= 0.0.3)
+      jekyll-theme-midnight (= 0.0.3)
+      jekyll-theme-minimal (= 0.0.3)
+      jekyll-theme-modernist (= 0.0.3)
+      jekyll-theme-primer (= 0.1.5)
+      jekyll-theme-slate (= 0.0.3)
+      jekyll-theme-tactile (= 0.0.3)
+      jekyll-theme-time-machine (= 0.0.3)
+      jekyll-titles-from-headings (= 0.1.2)
       jemoji (= 0.7.0)
       kramdown (= 1.11.1)
       liquid (= 3.0.6)
@@ -45,17 +64,17 @@ GEM
       minima (= 2.0.0)
       rouge (= 1.11.1)
       terminal-table (~> 1.4)
-    github-pages-health-check (1.2.0)
+    github-pages-health-check (1.3.0)
       addressable (~> 2.3)
       net-dns (~> 0.8)
       octokit (~> 4.0)
-      public_suffix (~> 1.4)
+      public_suffix (~> 2.0)
       typhoeus (~> 0.7)
     html-pipeline (2.4.2)
       activesupport (>= 2)
       nokogiri (>= 1.4)
     i18n (0.7.0)
-    jekyll (3.3.0)
+    jekyll (3.3.1)
       addressable (~> 2.4)
       colorator (~> 1.0)
       jekyll-sass-converter (~> 1.0)
@@ -70,6 +89,8 @@ GEM
       jekyll (~> 3.0)
     jekyll-coffeescript (1.0.1)
       coffee-script (~> 2.2)
+    jekyll-default-layout (0.1.4)
+      jekyll (~> 3.0)
     jekyll-feed (0.8.0)
       jekyll (~> 3.3)
     jekyll-gist (1.4.0)
@@ -81,9 +102,15 @@ GEM
       activesupport (~> 4.0)
       html-pipeline (~> 2.3)
       jekyll (~> 3.0)
+    jekyll-optional-front-matter (0.1.2)
+      jekyll (~> 3.0)
     jekyll-paginate (1.1.0)
+    jekyll-readme-index (0.0.3)
+      jekyll (~> 3.0)
     jekyll-redirect-from (0.11.0)
       jekyll (>= 2.0)
+    jekyll-relative-links (0.2.1)
+      jekyll (~> 3.3)
     jekyll-sass-converter (1.3.0)
       sass (~> 3.2)
     jekyll-seo-tag (2.1.0)
@@ -91,6 +118,34 @@ GEM
     jekyll-sitemap (0.12.0)
       jekyll (~> 3.3)
     jekyll-swiss (0.4.0)
+    jekyll-theme-architect (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-cayman (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-dinky (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-hacker (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-leap-day (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-merlot (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-midnight (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-minimal (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-modernist (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-primer (0.1.5)
+      jekyll (~> 3.3)
+    jekyll-theme-slate (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-tactile (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-theme-time-machine (0.0.3)
+      jekyll (~> 3.3)
+    jekyll-titles-from-headings (0.1.2)
+      jekyll (~> 3.3)
     jekyll-watch (1.5.0)
       listen (~> 3.0, < 3.1)
     jemoji (0.7.0)
@@ -108,16 +163,16 @@ GEM
     mercenary (0.3.6)
     mini_portile2 (2.1.0)
     minima (2.0.0)
-    minitest (5.9.1)
+    minitest (5.10.1)
     multipart-post (2.0.0)
     net-dns (0.8.0)
     nokogiri (1.6.8.1)
       mini_portile2 (~> 2.1.0)
-    octokit (4.6.0)
+    octokit (4.6.2)
       sawyer (~> 0.8.0, >= 0.5.3)
     pathutil (0.14.0)
       forwardable-extended (~> 2.6)
-    public_suffix (1.5.3)
+    public_suffix (2.0.4)
     rb-fsevent (0.9.8)
     rb-inotify (0.9.7)
       ffi (>= 0.5.0)
@@ -125,9 +180,9 @@ GEM
     rouge (1.11.1)
     safe_yaml (1.0.4)
     sass (3.4.22)
-    sawyer (0.8.0)
+    sawyer (0.8.1)
       addressable (>= 2.3.5, < 2.6)
-      faraday (~> 0.8, < 0.10)
+      faraday (~> 0.8, < 1.0)
     terminal-table (1.7.3)
       unicode-display_width (~> 1.1.1)
     therubyracer (0.12.2)
@@ -138,13 +193,13 @@ GEM
       ethon (>= 0.8.0)
     tzinfo (1.2.2)
       thread_safe (~> 0.1)
-    unicode-display_width (1.1.1)
+    unicode-display_width (1.1.2)
 
 PLATFORMS
   ruby
 
 DEPENDENCIES
-  github-pages (= 104)
+  github-pages (= 110)
   therubyracer
 
 BUNDLED WITH

http://git-wip-us.apache.org/repos/asf/accumulo-website/blob/f261f85a/_posts/blog/2016-12-19-running-on-fedora-25.md
----------------------------------------------------------------------
diff --git a/_posts/blog/2016-12-19-running-on-fedora-25.md b/_posts/blog/2016-12-19-running-on-fedora-25.md
new file mode 100644
index 0000000..9e911b7
--- /dev/null
+++ b/_posts/blog/2016-12-19-running-on-fedora-25.md
@@ -0,0 +1,372 @@
+---
+title: "Running Accumulo on Fedora 25"
+author: Christopher Tubbs and Mike Miller
+reviewers: Keith Turner, Mike Walch
+---
+
+Apache Accumulo has been available in [Fedora] since F20. Recently, the Fedora
+packages have been updated to Accumulo version `1.6.6` and have made some
+improvements to the default configuration and launch scripts to provide a good
+out-of-box experience. This post will discuss the basic setup procedures for
+running Accumulo in the latest version, `Fedora 25`.
+
+## Prepare the system
+
+**WARNING**: Before you start, be sure you've got plenty of free disk space.
+Otherwise, you could run into this [bug] or see other problems.
+
+These instructions will assume you're using Fedora 25, fully up-to-date (`sudo
+dnf --refresh upgrade`).
+
+### Install packages
+
+Fedora provides a meta-package to install Accumulo and all of its dependencies.
+It's a good idea to install the JDK, so you'll have access to the `jps`
+command, and `tuned` for setting system performance tuning parameters from a
+profile. It's also a good idea to ensure the optional hadoop native libraries
+are installed, and you have a good editor (replace `vim` with your preferred
+editor):
+
+```bash
+sudo dnf install accumulo java-1.8.0-openjdk-devel tuned vim hadoop-common-native
+```
+
+It is possible to install only a specific Accumulo service. For the single node
+setup, almost everything is needed. For the multi-node setup, it might make
+more sense to be selective about which you choose to install on each node (for
+example, to only install `accumulo-tserver`).
+
+### Set up tuned
+
+(Optional) `tuned` can optimize your server settings, adjusting things like
+your `vm.swappiness`. To set up `tuned`, do:
+
+```bash
+sudo systemctl start tuned.service     # start service
+sudo tuned-adm profile network-latency # pick a good profile
+sudo tuned-adm active                  # verify the selected profile
+sudo systemctl enable tuned.service    # auto-start on reboots
+```
+
+### Set up ZooKeeper
+
+You'll need to set up ZooKeeper, regardless of whether you'll be running a
+single node or many. So, let's create its configuration file (the defaults are
+fine):
+
+```bash
+sudo cp /etc/zookeeper/zoo_sample.cfg /etc/zookeeper/zoo.cfg
+```
+
+Now, let's start ZooKeeper (and set it to run on reboot):
+
+```bash
+sudo systemctl start zookeeper.service
+sudo systemctl enable zookeeper.service
+```
+
+Note that the default port for ZooKeeper is `2181`. Remember the hostname of
+the node where ZooKeeper is running, referred to as `<zk-dns-name>` later.
+
+## Running a single node
+
+### Configure Accumulo
+
+To run on a single node, you don't need to run HDFS. Accumulo can use the local
+filesystem as a volume instead. By default, it uses `/tmp/accumulo`. Let's
+change that to something which will survive a reboot:
+
+```bash
+sudo vim /etc/accumulo/accumulo-site.xml
+```
+
+Change the value of the `instance.volumes` property from `file:///tmp/accumulo`
+to `file:///var/tmp/accumulo` in the configuration file (or another preferred
+location).
+
+While you are editing the Accumulo configuration file, you should also change
+the default `instance.secret` from `DEFAULT` to something else. You can also
+change the credentials used by the `tracer` service now, too. If you use the
+`root` user, you'll have to set its password to the same one you'll use later
+when you initialize Accumulo. If you use another user name, you'll have to
+create that user later.
+
+### Configure Hadoop client
+
+Hadoop's default local filesystem handler isn't very good at ensuring files are
+written to disk when services are stopped. So, let's use a better filesystem
+implementation for `file://` locations. This implementation may not be as
+robust as a full HDFS instance, but it's more reliable than the default. Even
+though you're not going to be running HDFS, the Hadoop client code used in
+Accumulo can still be configured by modifying Hadoop's configuration file:
+
+```bash
+sudo vim /etc/hadoop/core-site.xml
+```
+
+Add a new property:
+
+```xml
+  <property>
+    <name>fs.file.impl</name>
+    <value>org.apache.hadoop.fs.RawLocalFileSystem</value>
+  </property>
+```
+
+### Initialize Accumulo
+
+Now, initialize Accumulo. You'll need to do this as the `accumulo` user,
+because the Accumulo services run as the `accumulo` user. This user is created
+automatically by the RPMs if it doesn't exist when the RPMs are installed. If
+you already have a user and/or group by this name, it will probably not be a
+problem, but be aware that this user will have permissions for the server
+configuration files. To initialize Accumulo as a specific user, use `sudo -u`:
+
+```bash
+sudo -u accumulo accumulo init
+```
+
+As expected, this command will fail if ZooKeeper is not running, or if the
+destination volume (`file:///var/tmp/accumulo`) already exists.
+
+### Start Accumulo services
+
+Now that Accumulo is initialized, you can start its services:
+
+```bash
+sudo systemctl start accumulo-{master,tserver,gc,tracer,monitor}.service
+```
+
+Enable the commands to start at boot:
+
+```bash
+sudo systemctl enable accumulo-{master,tserver,gc,tracer,monitor}.service
+```
+
+## Running multiple nodes
+
+### Amazon EC2 setup
+
+For a multi-node setup, the authors tested these instructions with a Fedora 25
+Cloud AMI on Amazon EC2 with the following characteristics:
+
+* `us-east-1` availability zone
+* `ami-e5757bf2` (latest in `us-east-1` at time of writing)
+* `HVM` virtualization type
+* `gp2` disk type
+* `64GB EBS` root volume (no additional storage)
+* `m4.large` and `m4.xlarge` instance types (tested on both)
+* `3` nodes
+
+For this setup, you should have a name service configured properly. For
+convenience, we used the EC2 provided internal DNS, with internal IP addresses.
+Make sure the nodes can communicate with each other using these names. If
+you're using EC2, this means making sure they are in the same security group,
+and the security group has an inbound rule for "All traffic" with the source
+set to itself (`sg-xxxxxxxx`).
+
+The default user is `fedora` for the Fedora Cloud AMIs. For the best
+experience, don't forget to make sure they are fully up-to-date (`sudo dnf
+--refresh upgrade`).
+
+### Configure and run Hadoop
+
+Configuring HDFS is the primary difference between the single and multi-node
+setup. For both Hadoop and Accumulo, you can edit the configuration files on
+one machine, and copy them to the others.
+
+Pick a server to be the NameNode and identify its DNS name,
+(`<namenode-dns-name>`). Edit Hadoop's configuration to set the default
+filesystem name to this location:
+
+```bash
+sudo vim /etc/hadoop/core-site.xml
+```
+
+Set the value for the property `fs.default.name` to
+`hdfs://<namenode-dns-name>:8020`.
+
+Distribute copies of the changed configuration files to each node.
+
+Now, format the NameNode. You'll need to do this as the `hdfs` user on the
+NameNode instance:
+
+```bash
+sudo -u hdfs hdfs namenode -format
+```
+
+On the NameNode, start the NameNode service and enable it on reboot:
+
+```bash
+sudo systemctl start hadoop-namenode.service
+sudo systemctl enable hadoop-namenode.service
+```
+
+On each DataNode, start the DataNode service:
+
+```bash
+sudo systemctl start hadoop-datanode.service
+sudo systemctl enable hadoop-datanode.service
+```
+
+### Configure and run Accumulo
+
+Update Accumulo's configuration to use this HDFS filesystem:
+
+```bash
+sudo vim /etc/accumulo/accumulo-site.xml
+```
+
+Change the value of the `instance.volumes` to
+`hdfs://<namenode-dns-name>:8020/accumulo` in the configuration file. Don't
+forget to also change the default `instance.secret` and the trace user's
+credentials, if necessary. Also, since you will have multiple nodes, you cannot
+use `localhost:2181` for ZooKeeper, so set `instance.zookeeper.host` to
+`<zk-dns-name>:2181`.
+
+Distribute copies of the changed configuration files to each node.
+
+With HDFS now running, make sure Accumulo has permission to create its
+directory in HDFS, and initialize Accumulo:
+
+```bash
+sudo -u hdfs hdfs dfs -chmod 777 /
+sudo -u accumulo accumulo init
+```
+
+After Accumulo has created its directory structure, you can change the
+permissions for the root back to what they were:
+
+```bash
+sudo -u hdfs hdfs dfs -chmod 755 /
+```
+
+Now, you can start Accumulo.
+
+On the NameNode, start all the Accumulo services and enable on reboot:
+
+```bash
+sudo systemctl start accumulo-{master,tserver,gc,tracer,monitor}.service
+sudo systemctl enable accumulo-{master,tserver,gc,tracer,monitor}.service
+```
+
+On each DataNode, start just the `tserver` and enable it on reboot:
+
+```bash
+sudo systemctl start accumulo-tserver.service
+sudo systemctl enable accumulo-tserver.service
+```
+
+## Watching and using Accumulo
+
+### Run the shell
+
+Run a shell as Accumulo's root user (the instance name and root password are
+the ones you selected during the initialize step above:
+
+```bash
+accumulo shell -u root -zh <zk-dns-name>:2181 -zi <instanceName>
+```
+
+### View the monitor pages
+
+You should also be able to view the NameNode monitor page and the Accumulo
+monitor pages. If you are running this in EC2, you can view these over an SSH
+tunnel using the NameNode's public IP address. If you didn't give this node a
+public IP address, you can allocate one in EC2 and associate it with this node:
+
+```bash
+ssh -L50070:localhost:50070 -L50095:localhost:50095 <user>@<host>
+```
+
+Replace `<user>` with your username (probably `fedora` if using the Fedora
+AMI), and `<host>` with the public IP or hostname for your EC2 instance. Now,
+in your local browser, you should be able to navigate to these addresses in
+your localhost: [Hadoop monitor (http://localhost:50070)][HMon] and [Accumulo
+monitor (http://localhost:50095)][AMon].
+
+## Debugging commands
+
+Check the status of a service:
+
+```bash
+sudo systemctl status <ServiceName>.service
+```
+
+Check running Java processes:
+
+```bash
+sudo jps -ml
+```
+
+Check the system logs for a specific service within the last 10 minutes:
+
+```bash
+sudo journalctl -u <ServiceName> --since '10 minutes ago'
+```
+
+Check listening ports:
+
+```bash
+sudo netstat -tlnp
+```
+
+Check DNS name for a given IP address:
+
+```bash
+getent hosts <ipaddress> # OR
+hostname -A
+```
+
+Perform forward and reverse DNS lookups:
+
+```bash
+sudo dnf install bind-utils
+dig +short <hostname>     # forward DNS lookup
+dig +short -x <ipaddress> # reverse DNS lookup
+```
+
+Find the instance ID for your instance name:
+
+```bash
+zkCli.sh -server <host>:2181     # replace <host> with your ZooKeeper server
DNS name
+> get /accumulo/instances/<name> # replace <name> with your instance name
+> quit
+```
+
+If the NameNode is listening on the loopback address, you'll probably need to
+restart the service manually, as well as any Accumulo services which failed.
+This is a [known issue with Hadoop][HBug]:
+
+```bash
+sudo systemctl restart hadoop-namenode.service
+```
+
+Some helpful rpm commands:
+
+```bash
+rpm -q -i <installed-package-name>              # to see info about an installed package
+rpm -q -i -p <rpm-file-name>                    # to see info about an rpm file
+rpm -q --provides <installed-package-name>      # see what a package provides
+rpm -q --requires <installed-package-name>      # see what a package requires
+rpm -q -l <installed-package-name>              # list package files
+rpm -q --whatprovides <file>                    # find rpm which owns <file>
+rpm -q --whatrequires 'mvn(groupId:artifactId)' # find rpm which requires maven coords
+```
+
+## Helping out
+
+Feel free to get involved with the [Fedora][FPackagers] or [Fedora EPEL][EPEL]
+(for RHEL/CentOS users) packaging. Contact the Fedora [maintainers] (user `at`
+fedoraproject `dot` org) for the Accumulo packages to see how you can help
+patching bugs, adapting the upstream packages to the Fedora packaging
+standards, testing updates, maintaining dependency packages, and more.
+
+[Fedora]: https://getfedora.org/
+[maintainers]: https://admin.fedoraproject.org/pkgdb/package/rpms/accumulo/
+[bug]: https://bugzilla.redhat.com/show_bug.cgi?id=1404888
+[HMon]: http://localhost:50070
+[AMon]: http://localhost:50095
+[HBug]: https://bugzilla.redhat.com/show_bug.cgi?id=1406165
+[EPEL]: https://fedoraproject.org/wiki/EPEL
+[FPackagers]: https://fedoraproject.org/wiki/Join_the_package_collection_maintainers


Mime
View raw message