Return-Path: X-Original-To: apmail-gora-commits-archive@www.apache.org Delivered-To: apmail-gora-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id C0E0510620 for ; Thu, 3 Sep 2015 07:28:38 +0000 (UTC) Received: (qmail 22776 invoked by uid 500); 3 Sep 2015 07:28:38 -0000 Delivered-To: apmail-gora-commits-archive@gora.apache.org Received: (qmail 22703 invoked by uid 500); 3 Sep 2015 07:28:38 -0000 Mailing-List: contact commits-help@gora.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@gora.apache.org Delivered-To: mailing list commits@gora.apache.org Received: (qmail 22087 invoked by uid 99); 3 Sep 2015 07:28:38 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 03 Sep 2015 07:28:38 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 1ABABE7E15; Thu, 3 Sep 2015 07:28:38 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: lewismc@apache.org To: commits@gora.apache.org Date: Thu, 03 Sep 2015 07:28:44 -0000 Message-Id: <8ed002f82ffa4b898532540725c8f539@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [08/25] gora git commit: * Writing to Hbase via Spark is implemented. * Writing to Hbase via Spark is implemented. Project: http://git-wip-us.apache.org/repos/asf/gora/repo Commit: http://git-wip-us.apache.org/repos/asf/gora/commit/c111e629 Tree: http://git-wip-us.apache.org/repos/asf/gora/tree/c111e629 Diff: http://git-wip-us.apache.org/repos/asf/gora/diff/c111e629 Branch: refs/heads/master Commit: c111e6290fa16d2cc560eb29dbc07eb2f8b7734b Parents: 80c0c26 Author: Furkan KAMACI Authored: Wed Jul 15 20:23:51 2015 +0300 Committer: Furkan KAMACI Committed: Wed Jul 15 20:23:51 2015 +0300 ---------------------------------------------------------------------- .../gora/tutorial/log/LogAnalyticsSpark.java | 35 ++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/gora/blob/c111e629/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java ---------------------------------------------------------------------- diff --git a/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java b/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java index abced3f..828939e 100644 --- a/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java +++ b/gora-tutorial/src/main/java/org/apache/gora/tutorial/log/LogAnalyticsSpark.java @@ -19,12 +19,16 @@ package org.apache.gora.tutorial.log; import java.util.Map; +import org.apache.gora.mapreduce.GoraMapReduceUtils; +import org.apache.gora.mapreduce.GoraOutputFormat; +import org.apache.gora.persistency.Persistent; import org.apache.gora.spark.GoraSparkEngine; import org.apache.gora.store.DataStore; import org.apache.gora.store.DataStoreFactory; import org.apache.gora.tutorial.log.generated.MetricDatum; import org.apache.gora.tutorial.log.generated.Pageview; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.Job; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; @@ -135,6 +139,8 @@ public class LogAnalyticsSpark { "Gora Integration Application").setMaster("local"); // todo _fk consider alternative architectural design + // todo design inStore and outStore initialization parts as like LogAnalytics.java + // todo consider creating job and manipulating it at input part as like LogAnalytics.java Class[] c = new Class[1]; c[0] = Pageview.class; sparkConf.registerKryoClasses(c); @@ -143,10 +149,10 @@ public class LogAnalyticsSpark { Configuration hadoopConf = new Configuration(); - DataStore dataStore = DataStoreFactory.getDataStore( + DataStore inStore = DataStoreFactory.getDataStore( inStoreClass, Long.class, Pageview.class, hadoopConf); - JavaPairRDD goraRDD = goraSparkEngine.initialize(sc, dataStore); + JavaPairRDD goraRDD = goraSparkEngine.initialize(sc, inStore); long count = goraRDD.count(); System.out.println("Total Log Count: " + count); @@ -162,10 +168,35 @@ public class LogAnalyticsSpark { System.out.println("MetricDatum count:" + reducedGoraRdd.count()); + //print screen output Map metricDatumMap = reducedGoraRdd.collectAsMap(); for (String key : metricDatumMap.keySet()) { System.out.println(key); } + // + + //write output to datastore + DataStore outStore = DataStoreFactory.getDataStore( + outStoreClass, String.class, MetricDatum.class, hadoopConf); + + GoraMapReduceUtils.setIOSerializations(hadoopConf, true); + + Job job = Job.getInstance(hadoopConf); + job.setOutputFormatClass(GoraOutputFormat.class); + job.setOutputKeyClass(outStore.getKeyClass()); + job.setOutputValueClass(outStore.getPersistentClass()); + + job.getConfiguration().setClass(GoraOutputFormat.DATA_STORE_CLASS, outStore.getClass(), + DataStore.class); + job.getConfiguration().setClass(GoraOutputFormat.OUTPUT_KEY_CLASS, outStore.getKeyClass(), Object.class); + job.getConfiguration().setClass(GoraOutputFormat.OUTPUT_VALUE_CLASS, + outStore.getPersistentClass(), Persistent.class); + + reducedGoraRdd.saveAsNewAPIHadoopDataset(job.getConfiguration()); + // + + inStore.close(); + outStore.close(); return 1; }