Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 42C1E200BC8 for ; Wed, 19 Oct 2016 05:27:26 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 4138C160AFC; Wed, 19 Oct 2016 03:27:26 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 6F660160B00 for ; Wed, 19 Oct 2016 05:27:25 +0200 (CEST) Received: (qmail 34582 invoked by uid 500); 19 Oct 2016 03:27:24 -0000 Mailing-List: contact commits-help@eagle.incubator.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@eagle.incubator.apache.org Delivered-To: mailing list commits@eagle.incubator.apache.org Received: (qmail 34571 invoked by uid 99); 19 Oct 2016 03:27:24 -0000 Received: from pnap-us-west-generic-nat.apache.org (HELO spamd1-us-west.apache.org) (209.188.14.142) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 19 Oct 2016 03:27:24 +0000 Received: from localhost (localhost [127.0.0.1]) by spamd1-us-west.apache.org (ASF Mail Server at spamd1-us-west.apache.org) with ESMTP id 10130C0E84 for ; Wed, 19 Oct 2016 03:27:24 +0000 (UTC) X-Virus-Scanned: Debian amavisd-new at spamd1-us-west.apache.org X-Spam-Flag: NO X-Spam-Score: -6.219 X-Spam-Level: X-Spam-Status: No, score=-6.219 tagged_above=-999 required=6.31 tests=[KAM_ASCII_DIVIDERS=0.8, KAM_LAZY_DOMAIN_SECURITY=1, RCVD_IN_DNSWL_HI=-5, RCVD_IN_MSPIKE_H3=-0.01, RCVD_IN_MSPIKE_WL=-0.01, RP_MATCHES_RCVD=-2.999] autolearn=disabled Received: from mx1-lw-eu.apache.org ([10.40.0.8]) by localhost (spamd1-us-west.apache.org [10.40.0.7]) (amavisd-new, port 10024) with ESMTP id vMqxFutcjHFb for ; Wed, 19 Oct 2016 03:27:23 +0000 (UTC) Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx1-lw-eu.apache.org (ASF Mail Server at mx1-lw-eu.apache.org) with SMTP id 508FF5FC4E for ; Wed, 19 Oct 2016 03:27:18 +0000 (UTC) Received: (qmail 33096 invoked by uid 99); 19 Oct 2016 03:27:17 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 19 Oct 2016 03:27:17 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 992BAE2EF2; Wed, 19 Oct 2016 03:27:17 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: hao@apache.org To: commits@eagle.incubator.apache.org Date: Wed, 19 Oct 2016 03:27:54 -0000 Message-Id: In-Reply-To: <679f7884b2ff4acc87cbf4e51a1de5db@git.apache.org> References: <679f7884b2ff4acc87cbf4e51a1de5db@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [39/50] incubator-eagle git commit: [EAGLE-615] Jsoup parse hive sql return String without line break "\n" archived-at: Wed, 19 Oct 2016 03:27:26 -0000 [EAGLE-615] Jsoup parse hive sql return String without line break "\n" EAGLE-615 Jsoup parse hive sql return String without line break "\n" - ADD "doc.outputSettings().prettyPrint(false);", get element value via getWholeText() not text() Author: chitin Closes #499 from chitin/EAGLE615. Project: http://git-wip-us.apache.org/repos/asf/incubator-eagle/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-eagle/commit/a710082d Tree: http://git-wip-us.apache.org/repos/asf/incubator-eagle/tree/a710082d Diff: http://git-wip-us.apache.org/repos/asf/incubator-eagle/diff/a710082d Branch: refs/heads/master Commit: a710082d486e10b4732c00a06dd367dc556df60a Parents: a6bc0a5 Author: chitin Authored: Mon Oct 17 11:42:27 2016 +0800 Committer: Hao Chen Committed: Mon Oct 17 11:42:27 2016 +0800 ---------------------------------------------------------------------- .../hive/jobrunning/HiveJobFetchSpout.java | 35 +++++++++++++++----- 1 file changed, 26 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-eagle/blob/a710082d/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java ---------------------------------------------------------------------- diff --git a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java index c0673b3..af4599b 100644 --- a/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java +++ b/eagle-security/eagle-security-hive/src/main/java/org/apache/eagle/security/hive/jobrunning/HiveJobFetchSpout.java @@ -22,6 +22,7 @@ import backtype.storm.task.TopologyContext; import backtype.storm.topology.OutputFieldsDeclarer; import backtype.storm.topology.base.BaseRichSpout; import backtype.storm.tuple.Fields; +import org.apache.commons.lang.StringUtils; import org.apache.eagle.dataproc.impl.storm.ValuesArray; import org.apache.eagle.jpm.util.*; import org.apache.eagle.jpm.util.jobrecover.RunningJobManager; @@ -35,12 +36,14 @@ import org.apache.eagle.security.hive.config.RunningJobCrawlConfig; import org.codehaus.jackson.JsonParser; import org.codehaus.jackson.map.ObjectMapper; import org.jsoup.Jsoup; +import org.jsoup.nodes.TextNode; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; + import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import java.io.InputStream; @@ -62,6 +65,7 @@ public class HiveJobFetchSpout extends BaseRichSpout { private Long lastFinishAppTime; private RunningJobManager runningJobManager; private int partitionId; + static { OBJ_MAPPER.configure(JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS, true); } @@ -91,22 +95,22 @@ public class HiveJobFetchSpout extends BaseRichSpout { // sanity verify 0<=partitionId<=numTotalPartitions-1 if (partitionId < 0 || partitionId > crawlConfig.controlConfig.numTotalPartitions) { throw new IllegalStateException("partitionId should be less than numTotalPartitions with partitionId " + - partitionId + " and numTotalPartitions " + crawlConfig.controlConfig.numTotalPartitions); + partitionId + " and numTotalPartitions " + crawlConfig.controlConfig.numTotalPartitions); } Class partitionerCls = crawlConfig.controlConfig.partitionerCls; try { this.jobFilter = new JobIdFilterByPartition(partitionerCls.newInstance(), - crawlConfig.controlConfig.numTotalPartitions, partitionId); + crawlConfig.controlConfig.numTotalPartitions, partitionId); } catch (Exception e) { LOG.error("failing instantiating job partitioner class " + partitionerCls.getCanonicalName()); throw new IllegalStateException(e); } this.collector = collector; this.runningJobManager = new RunningJobManager(crawlConfig.zkStateConfig.zkQuorum, - crawlConfig.zkStateConfig.zkSessionTimeoutMs, - crawlConfig.zkStateConfig.zkRetryTimes, - crawlConfig.zkStateConfig.zkRetryInterval, - crawlConfig.zkStateConfig.zkRoot); + crawlConfig.zkStateConfig.zkSessionTimeoutMs, + crawlConfig.zkStateConfig.zkRetryTimes, + crawlConfig.zkStateConfig.zkRetryInterval, + crawlConfig.zkStateConfig.zkRoot); this.lastFinishAppTime = this.runningJobManager.recoverLastFinishedTime(partitionId); if (this.lastFinishAppTime == 0l) { this.lastFinishAppTime = Calendar.getInstance().getTimeInMillis() - 24 * 60 * 60000l;//one day ago @@ -119,7 +123,7 @@ public class HiveJobFetchSpout extends BaseRichSpout { LOG.info("start to fetch job list"); try { List apps = rmResourceFetcher.getResource(Constants.ResourceType.RUNNING_MR_JOB); - if(apps == null){ + if (apps == null) { apps = new ArrayList<>(); } handleApps(apps, true); @@ -127,7 +131,7 @@ public class HiveJobFetchSpout extends BaseRichSpout { long fetchTime = Calendar.getInstance().getTimeInMillis(); if (fetchTime - this.lastFinishAppTime > 60000l) { apps = rmResourceFetcher.getResource(Constants.ResourceType.COMPLETE_MR_JOB, Long.toString(this.lastFinishAppTime)); - if(apps == null){ + if (apps == null) { apps = new ArrayList<>(); } handleApps(apps, false); @@ -230,6 +234,7 @@ public class HiveJobFetchSpout extends BaseRichSpout { LOG.info("fetch job conf from {}", urlString); is = InputStreamUtils.getInputStream(urlString, null, Constants.CompressionType.NONE); final org.jsoup.nodes.Document doc = Jsoup.parse(is, "UTF-8", urlString); + doc.outputSettings().prettyPrint(false); org.jsoup.select.Elements elements = doc.select("table[id=conf]").select("tbody").select("tr"); Map hiveQueryLog = new HashMap<>(); Iterator iter = elements.iterator(); @@ -237,7 +242,19 @@ public class HiveJobFetchSpout extends BaseRichSpout { org.jsoup.nodes.Element element = iter.next(); org.jsoup.select.Elements tds = element.children(); String key = tds.get(0).text(); - String value = tds.get(1).text(); + String value = ""; + org.jsoup.nodes.Element valueElement = tds.get(1); + if (Constants.HIVE_QUERY_STRING.equals(key)) { + for (org.jsoup.nodes.Node child : valueElement.childNodes()) { + if (child instanceof TextNode) { + TextNode valueTextNode = (TextNode) child; + value = valueTextNode.getWholeText(); + value = StringUtils.strip(value); + } + } + } else { + value = valueElement.text(); + } hiveQueryLog.put(key, value); } if (hiveQueryLog.containsKey(Constants.HIVE_QUERY_STRING)) {