Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 29261200B99 for ; Wed, 5 Oct 2016 17:24:39 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 27D8D160ADE; Wed, 5 Oct 2016 15:24:39 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 105D1160ADB for ; Wed, 5 Oct 2016 17:24:37 +0200 (CEST) Received: (qmail 63997 invoked by uid 500); 5 Oct 2016 15:24:37 -0000 Mailing-List: contact common-commits-help@hadoop.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Delivered-To: mailing list common-commits@hadoop.apache.org Received: (qmail 63988 invoked by uid 99); 5 Oct 2016 15:24:37 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 05 Oct 2016 15:24:37 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 035FDDFB81; Wed, 5 Oct 2016 15:24:37 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jlowe@apache.org To: common-commits@hadoop.apache.org Message-Id: <76c8a121f01e4e3eb9dacc91a205f0dc@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: hadoop git commit: MAPREDUCE-6741. Add MR support to redact job conf properties. Contributed by Haibo Chen Date: Wed, 5 Oct 2016 15:24:37 +0000 (UTC) archived-at: Wed, 05 Oct 2016 15:24:39 -0000 Repository: hadoop Updated Branches: refs/heads/branch-2.8 f66863f84 -> f1b74a3d9 MAPREDUCE-6741. Add MR support to redact job conf properties. Contributed by Haibo Chen Project: http://git-wip-us.apache.org/repos/asf/hadoop/repo Commit: http://git-wip-us.apache.org/repos/asf/hadoop/commit/f1b74a3d Tree: http://git-wip-us.apache.org/repos/asf/hadoop/tree/f1b74a3d Diff: http://git-wip-us.apache.org/repos/asf/hadoop/diff/f1b74a3d Branch: refs/heads/branch-2.8 Commit: f1b74a3d9ff71bc014dbfd29a6996071b81d14c5 Parents: f66863f Author: Jason Lowe Authored: Wed Oct 5 15:24:10 2016 +0000 Committer: Jason Lowe Committed: Wed Oct 5 15:24:10 2016 +0000 ---------------------------------------------------------------------- .../jobhistory/JobHistoryEventHandler.java | 19 +++--- .../mapreduce/v2/app/webapp/dao/ConfInfo.java | 4 +- .../jobhistory/TestJobHistoryEventHandler.java | 71 ++++++++++++++++++++ .../mapreduce/v2/app/webapp/TestBlocks.java | 10 ++- .../apache/hadoop/mapreduce/MRJobConfig.java | 4 ++ .../hadoop/mapreduce/util/MRJobConfUtil.java | 45 +++++++++++++ .../src/main/resources/mapred-default.xml | 7 ++ 7 files changed, 147 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1b74a3d/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java index d9025b8..d36462f 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/jobhistory/JobHistoryEventHandler.java @@ -50,6 +50,7 @@ import org.apache.hadoop.mapreduce.JobCounter; import org.apache.hadoop.mapreduce.MRJobConfig; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.TypeConverter; +import org.apache.hadoop.mapreduce.util.MRJobConfUtil; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.api.records.JobState; import org.apache.hadoop.mapreduce.v2.app.AppContext; @@ -492,16 +493,16 @@ public class JobHistoryEventHandler extends AbstractService if (conf != null) { // TODO Ideally this should be written out to the job dir // (.staging/jobid/files - RecoveryService will need to be patched) - FSDataOutputStream jobFileOut = null; - try { - if (logDirConfPath != null) { - jobFileOut = stagingDirFS.create(logDirConfPath, true); - conf.writeXml(jobFileOut); - jobFileOut.close(); + if (logDirConfPath != null) { + Configuration redactedConf = new Configuration(conf); + MRJobConfUtil.redact(redactedConf); + try (FSDataOutputStream jobFileOut = stagingDirFS + .create(logDirConfPath, true)) { + redactedConf.writeXml(jobFileOut); + } catch (IOException e) { + LOG.info("Failed to write the job configuration file", e); + throw e; } - } catch (IOException e) { - LOG.info("Failed to write the job configuration file", e); - throw e; } } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1b74a3d/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java index a05c317..287ab99 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/main/java/org/apache/hadoop/mapreduce/v2/app/webapp/dao/ConfInfo.java @@ -26,8 +26,7 @@ import javax.xml.bind.annotation.XmlAccessorType; import javax.xml.bind.annotation.XmlRootElement; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileContext; -import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapreduce.util.MRJobConfUtil; import org.apache.hadoop.mapreduce.v2.app.job.Job; @XmlRootElement(name = "conf") @@ -45,6 +44,7 @@ public class ConfInfo { this.property = new ArrayList(); Configuration jobConf = job.loadConfFile(); this.path = job.getConfFile().toString(); + MRJobConfUtil.redact(jobConf); for (Map.Entry entry : jobConf) { this.property.add(new ConfEntryInfo(entry.getKey(), entry.getValue(), jobConf.getPropertySources(entry.getKey()))); http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1b74a3d/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java index 8ca386e..d1a25b0 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/jobhistory/TestJobHistoryEventHandler.java @@ -29,6 +29,7 @@ import static org.mockito.Mockito.when; import java.io.File; import java.io.FileOutputStream; +import java.io.InputStream; import java.io.IOException; import java.util.HashMap; @@ -51,6 +52,7 @@ import org.apache.hadoop.mapreduce.TaskAttemptID; import org.apache.hadoop.mapreduce.TaskID; import org.apache.hadoop.mapreduce.TaskType; import org.apache.hadoop.mapreduce.TypeConverter; +import org.apache.hadoop.mapreduce.util.MRJobConfUtil; import org.apache.hadoop.mapreduce.v2.api.records.JobId; import org.apache.hadoop.mapreduce.v2.app.AppContext; import org.apache.hadoop.mapreduce.v2.app.job.Job; @@ -369,6 +371,74 @@ public class TestJobHistoryEventHandler { } } + @Test + public void testPropertyRedactionForJHS() throws Exception { + final Configuration conf = new Configuration(); + + String sensitivePropertyName = "aws.fake.credentials.name"; + String sensitivePropertyValue = "aws.fake.credentials.val"; + conf.set(sensitivePropertyName, sensitivePropertyValue); + conf.set(MRJobConfig.MR_JOB_REDACTED_PROPERTIES, + sensitivePropertyName); + conf.set(CommonConfigurationKeysPublic.FS_DEFAULT_NAME_KEY, + dfsCluster.getURI().toString()); + final TestParams params = new TestParams(); + conf.set(MRJobConfig.MR_AM_STAGING_DIR, params.dfsWorkDir); + + final JHEvenHandlerForTest jheh = + new JHEvenHandlerForTest(params.mockAppContext, 0, false); + + try { + jheh.init(conf); + jheh.start(); + handleEvent(jheh, new JobHistoryEvent(params.jobId, + new AMStartedEvent(params.appAttemptId, 200, params.containerId, + "nmhost", 3000, 4000, -1))); + handleEvent(jheh, new JobHistoryEvent(params.jobId, + new JobUnsuccessfulCompletionEvent(TypeConverter.fromYarn( + params.jobId), 0, 0, 0, JobStateInternal.FAILED.toString()))); + + // verify the value of the sensitive property in job.xml is restored. + Assert.assertEquals(sensitivePropertyName + " is modified.", + conf.get(sensitivePropertyName), sensitivePropertyValue); + + // load the job_conf.xml in JHS directory and verify property redaction. + Path jhsJobConfFile = getJobConfInIntermediateDoneDir(conf, params.jobId); + Assert.assertTrue("The job_conf.xml file is not in the JHS directory", + FileContext.getFileContext(conf).util().exists(jhsJobConfFile)); + Configuration jhsJobConf = new Configuration(); + + try (InputStream input = FileSystem.get(conf).open(jhsJobConfFile)) { + jhsJobConf.addResource(input); + Assert.assertEquals( + sensitivePropertyName + " is not redacted in HDFS.", + MRJobConfUtil.REDACTION_REPLACEMENT_VAL, + jhsJobConf.get(sensitivePropertyName)); + } + } finally { + jheh.stop(); + purgeHdfsHistoryIntermediateDoneDirectory(conf); + } + } + + private static Path getJobConfInIntermediateDoneDir(Configuration conf, + JobId jobId) throws IOException { + Path userDoneDir = new Path( + JobHistoryUtils.getHistoryIntermediateDoneDirForUser(conf)); + Path doneDirPrefix = + FileContext.getFileContext(conf).makeQualified(userDoneDir); + return new Path( + doneDirPrefix, JobHistoryUtils.getIntermediateConfFileName(jobId)); + } + + private void purgeHdfsHistoryIntermediateDoneDirectory(Configuration conf) + throws IOException { + FileSystem fs = FileSystem.get(dfsCluster.getConfiguration(0)); + String intermDoneDirPrefix = + JobHistoryUtils.getConfiguredHistoryIntermediateDoneDirPrefix(conf); + fs.delete(new Path(intermDoneDirPrefix), true); + } + @Test (timeout=50000) public void testDefaultFsIsUsedForHistory() throws Exception { // Create default configuration pointing to the minicluster @@ -410,6 +480,7 @@ public class TestJobHistoryEventHandler { localFileSystem.exists(new Path(t.dfsWorkDir))); } finally { jheh.stop(); + purgeHdfsHistoryIntermediateDoneDirectory(conf); } } http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1b74a3d/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java index 3876fe8..9eda977 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/src/test/java/org/apache/hadoop/mapreduce/v2/app/webapp/TestBlocks.java @@ -23,6 +23,8 @@ import java.io.PrintWriter; import java.util.HashMap; import java.util.Map; +import org.apache.hadoop.mapreduce.MRJobConfig; +import org.apache.hadoop.mapreduce.util.MRJobConfUtil; import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; @@ -64,6 +66,9 @@ public class TestBlocks { Path path = new Path("conf"); Configuration configuration = new Configuration(); configuration.set("Key for test", "Value for test"); + final String redactedProp = "Key for redaction"; + configuration.set(MRJobConfig.MR_JOB_REDACTED_PROPERTIES, + redactedProp); when(job.getConfFile()).thenReturn(path); when(job.loadConfFile()).thenReturn(configuration); @@ -84,9 +89,10 @@ public class TestBlocks { configurationBlock.render(html); pWriter.flush(); assertTrue(data.toString().contains("Key for test")); - assertTrue(data.toString().contains("Value for test")); - + assertTrue(data.toString().contains(redactedProp)); + assertTrue(data.toString().contains( + MRJobConfUtil.REDACTION_REPLACEMENT_VAL)); } /** http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1b74a3d/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java index 45033ff..8d460da 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/MRJobConfig.java @@ -942,4 +942,8 @@ public interface MRJobConfig { public static final int DEFAULT_MR_ENCRYPTED_INTERMEDIATE_DATA_BUFFER_KB = 128; + /** + * A comma-separated list of properties whose value will be redacted. + */ + String MR_JOB_REDACTED_PROPERTIES = "mapreduce.job.redacted-properties"; } http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1b74a3d/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java new file mode 100644 index 0000000..11d49a4 --- /dev/null +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/util/MRJobConfUtil.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.mapreduce.util; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.MRJobConfig; + +/** + * A class that contains utility methods for MR Job configuration. + */ +public final class MRJobConfUtil { + public static final String REDACTION_REPLACEMENT_VAL = "*********(redacted)"; + + /** + * Redact job configuration properties. + * @param conf the job configuration to redact + */ + public static void redact(final Configuration conf) { + for (String prop : conf.getTrimmedStringCollection( + MRJobConfig.MR_JOB_REDACTED_PROPERTIES)) { + conf.set(prop, REDACTION_REPLACEMENT_VAL); + } + } + + /** + * There is no reason to instantiate this utility class. + */ + private MRJobConfUtil() { + } +} http://git-wip-us.apache.org/repos/asf/hadoop/blob/f1b74a3d/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml ---------------------------------------------------------------------- diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml index 914826c..0ca8495 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/resources/mapred-default.xml @@ -1834,4 +1834,11 @@ default is -1 + + + The list of job configuration properties whose value will be redacted. + + mapreduce.job.redacted-properties + + --------------------------------------------------------------------- To unsubscribe, e-mail: common-commits-unsubscribe@hadoop.apache.org For additional commands, e-mail: common-commits-help@hadoop.apache.org