tez-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ss...@apache.org
Subject [1/4] TEZ-264. Remove MRRExampleHelper. (sseth)
Date Thu, 20 Jun 2013 20:04:56 GMT
Updated Branches:
  refs/heads/master cc15a6b22 -> 5d09d0469


http://git-wip-us.apache.org/repos/asf/incubator-tez/blob/5d09d046/example_jobs/wc_mrr_6m_3r_3r/job.xml_reduce2
----------------------------------------------------------------------
diff --git a/example_jobs/wc_mrr_6m_3r_3r/job.xml_reduce2 b/example_jobs/wc_mrr_6m_3r_3r/job.xml_reduce2
deleted file mode 100644
index 59c5001..0000000
--- a/example_jobs/wc_mrr_6m_3r_3r/job.xml_reduce2
+++ /dev/null
@@ -1,442 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" standalone="no"?><configuration>
-<property><name>mapreduce.job.ubertask.enable</name><value>false</value><source>mapred-default.xml</source></property>
-<property><name>yarn.resourcemanager.max-completed-applications</name><value>10000</value><source>yarn-default.xml</source></property>
-<property><name>yarn.resourcemanager.delayed.delegation-token.removal-interval-ms</name><value>30000</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.client.submit.file.replication</name><value>10</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.container-manager.thread-count</name><value>20</value><source>yarn-default.xml</source></property>
-<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value><source>programatically</source></property>
-<property><name>dfs.image.transfer.bandwidthPerSec</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.pmem-check-enabled</name><value>true</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.jobhistory.done-dir</name><value>/mapred/history/done</value><source>mapred-site.xml</source></property>
-<property><name>dfs.block.access.token.lifetime</name><value>600</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.resourcemanager.recovery.enabled</name><value>false</value><source>yarn-default.xml</source></property>
-<property><name>fs.AbstractFileSystem.file.impl</name><value>org.apache.hadoop.fs.local.LocalFs</value><source>core-default.xml</source></property>
-<property><name>mapreduce.client.completion.pollinterval</name><value>5000</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.job.ubertask.maxreduces</name><value>1</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.reduce.shuffle.memory.limit.percent</name><value>0.25</value><source>mapred-default.xml</source></property>
-<property><name>hadoop.ssl.keystores.factory.class</name><value>org.apache.hadoop.security.ssl.FileBasedKeyStoresFactory</value><source>core-default.xml</source></property>
-<property><name>mapred.job.name</name><value>word count</value><source>because mapreduce.job.name is deprecated</source></property>
-<property><name>hadoop.http.authentication.kerberos.keytab</name><value>${user.home}/hadoop.keytab</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.keytab</name><value>/etc/krb5.keytab</value><source>yarn-default.xml</source></property>
-<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value><source>core-default.xml</source></property>
-<property><name>s3.blocksize</name><value>67108864</value><source>core-default.xml</source></property>
-<property><name>mapreduce.task.io.sort.factor</name><value>10</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.disk-health-checker.interval-ms</name><value>120000</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.job.working.dir</name><value>hdfs://localhost:8020/user/sseth</value><source>programatically</source></property>
-<property><name>yarn.admin.acl</name><value>*</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.job.speculative.speculativecap</name><value>0.1</value><source>mapred-default.xml</source></property>
-<property><name>dfs.namenode.num.checkpoints.retained</name><value>2</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.delegation.token.renew-interval</name><value>86400000</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.resource.memory-mb</name><value>4096</value><source>yarn-site.xml</source></property>
-<property><name>io.map.index.interval</name><value>128</value><source>core-default.xml</source></property>
-<property><name>s3.client-write-packet-size</name><value>65536</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.http-address</name><value>0.0.0.0:50070</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.task.files.preserve.failedtasks</name><value>false</value><source>mapred-default.xml</source></property>
-<property><name>ha.zookeeper.session-timeout.ms</name><value>5000</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.reduce.class</name><value>org.apache.hadoop.examples.WordCount$IntSumReducer</value><source>programatically</source></property>
-<property><name>hadoop.hdfs.configuration.version</name><value>1</value><source>hdfs-default.xml</source></property>
-<property><name>s3.replication</name><value>3</value><source>core-default.xml</source></property>
-<property><name>dfs.datanode.balance.bandwidthPerSec</name><value>1048576</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.reduce.shuffle.connect.timeout</name><value>180000</value><source>mapred-default.xml</source></property>
-<property><name>dfs.journalnode.rpc-address</name><value>0.0.0.0:8485</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.aux-services</name><value>mapreduce.shuffle</value><source>yarn-site.xml</source></property>
-<property><name>hadoop.ssl.enabled</name><value>false</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.counters.max</name><value>120</value><source>mapred-default.xml</source></property>
-<property><name>dfs.datanode.readahead.bytes</name><value>4193404</value><source>hdfs-default.xml</source></property>
-<property><name>ipc.client.connect.max.retries.on.timeouts</name><value>45</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.complete.cancel.delegation.tokens</name><value>true</value><source>mapred-default.xml</source></property>
-<property><name>dfs.client.failover.max.attempts</name><value>15</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.checkpoint.dir</name><value>file://${hadoop.tmp.dir}/dfs/namesecondary</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.replication.work.multiplier.per.iteration</name><value>2</value><source>hdfs-default.xml</source></property>
-<property><name>fs.trash.interval</name><value>0</value><source>core-default.xml</source></property>
-<property><name>yarn.resourcemanager.admin.address</name><value>${yarn.resourcemanager.hostname}:8033</value><source>yarn-default.xml</source></property>
-<property><name>ha.health-monitor.check-interval.ms</name><value>1000</value><source>core-default.xml</source></property>
-<property><name>hadoop.jetty.logs.serve.aliases</name><value>true</value><source>core-default.xml</source></property>
-<property><name>hadoop.http.authentication.kerberos.principal</name><value>HTTP/_HOST@LOCALHOST</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.reduce.shuffle.consumer.plugin.class</name><value>org.apache.hadoop.mapreduce.task.reduce.Shuffle</value><source>mapred-default.xml</source></property>
-<property><name>s3native.blocksize</name><value>67108864</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.edits.dir</name><value>${dfs.namenode.name.dir}</value><source>hdfs-default.xml</source></property>
-<property><name>ha.health-monitor.sleep-after-disconnect.ms</name><value>1000</value><source>core-default.xml</source></property>
-<property><name>yarn.app.mapreduce.am.log.level</name><value>DEBUG</value><source>mapred-site.xml</source></property>
-<property><name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name><value>1000</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.job.map.class</name><value>org.apache.hadoop.examples.WordCount$TokenizerMapper</value><source>programatically</source></property>
-<property><name>dfs.encrypt.data.transfer</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.http.address</name><value>0.0.0.0:50075</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.log-aggregation.retain-check-interval-seconds</name><value>-1</value><source>yarn-default.xml</source></property>
-<property><name>dfs.namenode.write.stale.datanode.ratio</name><value>0.5f</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.client.use.datanode.hostname</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.map.cpu.vcores</name><value>1</value><source>mapred-default.xml</source></property>
-<property><name>yarn.acl.enable</name><value>true</value><source>yarn-default.xml</source></property>
-<property><name>hadoop.security.instrumentation.requires.admin</name><value>false</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.localizer.fetch.thread-count</name><value>4</value><source>yarn-default.xml</source></property>
-<property><name>hadoop.security.authorization</name><value>false</value><source>core-default.xml</source></property>
-<property><name>dfs.client.failover.connection.retries.on.timeouts</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.security.group.mapping.ldap.search.filter.group</name><value>(objectClass=group)</value><source>core-default.xml</source></property>
-<property><name>mapreduce.output.fileoutputformat.compress.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.shuffle.max.connections</name><value>0</value><source>mapred-default.xml</source></property>
-<property><name>dfs.namenode.safemode.extension</name><value>30000</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.shuffle.port</name><value>2233</value><source>yarn-site.xml</source></property>
-<property><name>mapreduce.reduce.log.level</name><value>INFO</value><source>mapred-default.xml</source></property>
-<property><name>yarn.log-aggregation-enable</name><value>false</value><source>yarn-site.xml</source></property>
-<property><name>dfs.datanode.sync.behind.writes</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.https.server.keystore.resource</name><value>ssl-server.xml</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.security.group.mapping.ldap.search.attr.group.name</name><value>cn</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.replication.min</name><value>1</value><source>hdfs-default.xml</source></property>
-<property><name>s3native.bytes-per-checksum</name><value>512</value><source>core-default.xml</source></property>
-<property><name>yarn.app.mapreduce.am.job.reduce.rampup.limit</name><value>1.0</value><source>mapred-site.xml</source></property>
-<property><name>tfile.fs.output.buffer.size</name><value>262144</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.local-dirs</name><value>/Users/sseth/work2/hortonworks/mrx/run/local/0</value><source>yarn-site.xml</source></property>
-<property><name>fs.AbstractFileSystem.hdfs.impl</name><value>org.apache.hadoop.fs.Hdfs</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.map.output.collector.class</name><value>org.apache.hadoop.mapred.MapTask$MapOutputBuffer</value><source>mapred-default.xml</source></property>
-<property><name>dfs.namenode.safemode.min.datanodes</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.security.uid.cache.secs</name><value>14400</value><source>core-default.xml</source></property>
-<property><name>dfs.client.write.exclude.nodes.cache.expiry.interval.millis</name><value>600000</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.client.https.need-auth</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.client.https.keystore.resource</name><value>ssl-client.xml</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.max.objects</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.ssl.client.conf</name><value>ssl-client.xml</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.safemode.threshold-pct</name><value>0.999f</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.blocksize</name><value>268435456</value><source>hdfs-site.xml</source></property>
-<property><name>yarn.resourcemanager.scheduler.class</name><value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value><source>yarn-site.xml</source></property>
-<property><name>mapreduce.job.reduce.slowstart.completedmaps</name><value>1.0</value><source>mapred-site.xml</source></property>
-<property><name>mapreduce.job.end-notification.retry.attempts</name><value>0</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.map.memory.mb</name><value>1024</value><source>mapred-site.xml</source></property>
-<property><name>io.native.lib.available</name><value>true</value><source>core-default.xml</source></property>
-<property><name>dfs.client-write-packet-size</name><value>65536</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.name.dir</name><value>/hadoopWorkDirMrx/var/hdfs/name</value><source>hdfs-site.xml</source></property>
-<property><name>mapreduce.client.progressmonitor.pollinterval</name><value>1000</value><source>mapred-default.xml</source></property>
-<property><name>dfs.ha.log-roll.period</name><value>120</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.reduce.input.buffer.percent</name><value>0.0</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.map.output.compress.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.map.skip.proc.count.autoincr</name><value>true</value><source>mapred-default.xml</source></property>
-<property><name>dfs.client.failover.sleep.base.millis</name><value>500</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.directoryscan.threads</name><value>1</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.fsdataset.volume.choosing.balanced-space-preference-percent</name><value>0.75f</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.cluster.local.dir</name><value>${hadoop.tmp.dir}/mapred/local</value><source>mapred-default.xml</source></property>
-<property><name>dfs.permissions.enabled</name><value>true</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.support.append</name><value>true</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.reduce.shuffle.parallelcopies</name><value>5</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.env-whitelist</name><value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,HADOOP_YARN_HOME</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.job.maxtaskfailures.per.tracker</name><value>3</value><source>mapred-default.xml</source></property>
-<property><name>ipc.client.connection.maxidletime</name><value>10000</value><source>core-default.xml</source></property>
-<property><name>mapreduce.shuffle.ssl.enabled</name><value>false</value><source>mapred-default.xml</source></property>
-<property><name>dfs.namenode.invalidate.work.pct.per.iteration</name><value>0.32f</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.blockreport.intervalMsec</name><value>21600000</value><source>hdfs-default.xml</source></property>
-<property><name>fs.s3.sleepTimeSeconds</name><value>10</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.replication.considerLoad</name><value>true</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.scheduler.maximum-allocation-vcores</name><value>32</value><source>yarn-default.xml</source></property>
-<property><name>dfs.client.block.write.retries</name><value>3</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.ssl.server.conf</name><value>ssl-server.xml</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.name.dir.restore</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.hdfs-blocks-metadata.enabled</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>ha.zookeeper.parent-znode</name><value>/hadoop-ha</value><source>core-default.xml</source></property>
-<property><name>io.seqfile.lazydecompress</name><value>true</value><source>core-default.xml</source></property>
-<property><name>dfs.https.enable</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.reduce.merge.inmem.threshold</name><value>1000</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.input.fileinputformat.split.minsize</name><value>0</value><source>mapred-default.xml</source></property>
-<property><name>dfs.replication</name><value>3</value><source>hdfs-site.xml</source></property>
-<property><name>ipc.client.tcpnodelay</name><value>false</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.accesstime.precision</name><value>3600000</value><source>hdfs-default.xml</source></property>
-<property><name>s3.stream-buffer-size</name><value>4096</value><source>core-default.xml</source></property>
-<property><name>mapreduce.task.io.sort.mb</name><value>100</value><source>mapred-default.xml</source></property>
-<property><name>io.file.buffer.size</name><value>4096</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.audit.loggers</name><value>default</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.job.jar</name><value>/user/sseth/.staging/job_1365641922379_0001/job.jar</value><source>programatically</source></property>
-<property><name>dfs.namenode.checkpoint.txns</name><value>1000000</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.admin-env</name><value>MALLOC_ARENA_MAX=$MALLOC_ARENA_MAX</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.job.split.metainfo.maxsize</name><value>10000000</value><source>mapred-default.xml</source></property>
-<property><name>rpc.engine.org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB</name><value>org.apache.hadoop.ipc.ProtobufRpcEngine</value><source>programatically</source></property>
-<property><name>yarn.app.mapreduce.am.scheduler.heartbeat.interval-ms</name><value>1000</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.reduce.maxattempts</name><value>4</value><source>mapred-default.xml</source></property>
-<property><name>dfs.ha.tail-edits.period</name><value>60</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.security.authentication</name><value>simple</value><source>core-default.xml</source></property>
-<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.avoid.read.stale.datanode</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.app.mapreduce.am.job.task.listener.thread-count</name><value>30</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.job.reduces</name><value>3</value><source>from command line</source></property>
-<property><name>mapreduce.map.sort.spill.percent</name><value>0.80</value><source>mapred-default.xml</source></property>
-<property><name>dfs.client.file-block-storage-locations.timeout</name><value>60</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.drop.cache.behind.writes</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.job.end-notification.retry.interval</name><value>1000</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.job.maps</name><value>6</value><source>programatically</source></property>
-<property><name>mapreduce.job.speculative.slownodethreshold</name><value>1.0</value><source>mapred-default.xml</source></property>
-<property><name>dfs.block.access.token.enable</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>tfile.fs.input.buffer.size</name><value>262144</value><source>core-default.xml</source></property>
-<property><name>mapreduce.map.speculative</name><value>true</value><source>mapred-default.xml</source></property>
-<property><name>dfs.journalnode.http-address</name><value>0.0.0.0:8480</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.job.acl-view-job</name><value> </value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.reduce.shuffle.retry-delay.max.ms</name><value>60000</value><source>mapred-default.xml</source></property>
-<property><name>hadoop.kerberos.min.seconds.before.relogin</name><value>60</value><source>core-default.xml</source></property>
-<property><name>yarn.ipc.serializer.type</name><value>protocolbuffers</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.job.end-notification.max.retry.interval</name><value>5000</value><source>mapred-default.xml</source></property>
-<property><name>ftp.blocksize</name><value>67108864</value><source>core-default.xml</source></property>
-<property><name>dfs.datanode.data.dir</name><value>/hadoopWorkDirMrx/var/hdfs/data</value><source>hdfs-site.xml</source></property>
-<property><name>ha.failover-controller.cli-check.rpc-timeout.ms</name><value>20000</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.max.extra.edits.segments.retained</name><value>10000</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.replication.interval</name><value>3</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.resourcemanager.connect.wait.secs</name><value>900</value><source>yarn-default.xml</source></property>
-<property><name>dfs.namenode.https-address</name><value>0.0.0.0:50470</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.task.skip.start.attempts</name><value>2</value><source>mapred-default.xml</source></property>
-<property><name>dfs.ha.automatic-failover.enabled</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>ipc.client.kill.max</name><value>10</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.linux-container-executor.cgroups.mount</name><value>false</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.jobhistory.keytab</name><value>/etc/security/keytab/jhs.service.keytab</value><source>mapred-default.xml</source></property>
-<property><name>dfs.image.transfer.timeout</name><value>600000</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name><value>/hadoop-yarn</value><source>yarn-default.xml</source></property>
-<property><name>dfs.client.failover.sleep.max.millis</name><value>15000</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.job.end-notification.max.attempts</name><value>5</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.task.tmp.dir</name><value>./tmp</value><source>mapred-default.xml</source></property>
-<property><name>dfs.default.chunk.view.size</name><value>32768</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.reduce.memory.mb</name><value>1024</value><source>mapred-site.xml</source></property>
-<property><name>mapreduce.reduce.class</name><value>org.apache.hadoop.examples.WordCount$IntSumReducer</value><source>because mapreduce.job.reduce.class is deprecated</source></property>
-<property><name>hadoop.http.filter.initializers</name><value>org.apache.hadoop.yarn.server.webproxy.amfilter.AmFilterInitializer</value><source>programatically</source></property>
-<property><name>dfs.datanode.failed.volumes.tolerated</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.http.authentication.type</name><value>simple</value><source>core-default.xml</source></property>
-<property><name>dfs.datanode.data.dir.perm</name><value>700</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.resourcemanager.client.thread-count</name><value>50</value><source>yarn-default.xml</source></property>
-<property><name>ipc.server.listen.queue.size</name><value>128</value><source>core-default.xml</source></property>
-<property><name>mapreduce.reduce.skip.maxgroups</name><value>0</value><source>mapred-default.xml</source></property>
-<property><name>file.stream-buffer-size</name><value>4096</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.fs-limits.max-directory-items</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.resourcemanager.store.class</name><value>org.apache.hadoop.yarn.server.resourcemanager.recovery.FileSystemRMStateStore</value><source>yarn-default.xml</source></property>
-<property><name>io.mapfile.bloom.size</name><value>1048576</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.container-executor.class</name><value>org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.map.maxattempts</name><value>4</value><source>mapred-default.xml</source></property>
-<property><name>yarn.log-aggregation.retain-seconds</name><value>-1</value><source>yarn-default.xml</source></property>
-<property><name>yarn.app.mapreduce.am.job.committer.cancel-timeout</name><value>60000</value><source>mapred-default.xml</source></property>
-<property><name>ftp.replication</name><value>3</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.health-checker.script.timeout-ms</name><value>1200000</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.jobhistory.intermediate-done-dir</name><value>/mapred/history/done_intermediate</value><source>mapred-site.xml</source></property>
-<property><name>mapreduce.jobhistory.address</name><value>0.0.0.0:10020</value><source>mapred-default.xml</source></property>
-<property><name>dfs.datanode.dns.nameserver</name><value>default</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.application.classpath</name><value>$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/*,$HADOOP_MAPRED_HOME/share/hadoop/mapreduce/lib/*</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.log.retain-seconds</name><value>10800</value><source>yarn-site.xml</source></property>
-<property><name>yarn.nodemanager.local-cache.max-files-per-directory</name><value>8192</value><source>yarn-default.xml</source></property>
-<property><name>mapred.child.java.opts</name><value>-server -Xmx200m</value><source>mapred-site.xml</source></property>
-<property><name>dfs.replication.max</name><value>512</value><source>hdfs-default.xml</source></property>
-<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value><source>mapred-default.xml</source></property>
-<property><name>dfs.stream-buffer-size</name><value>4096</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.backup.address</name><value>0.0.0.0:50100</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.util.hash.type</name><value>murmur</value><source>core-default.xml</source></property>
-<property><name>dfs.block.access.key.update.interval</name><value>600</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.use.datanode.hostname</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.dns.interface</name><value>default</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.reduce.skip.proc.count.autoincr</name><value>true</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.job.output.key.class</name><value>org.apache.hadoop.io.Text</value><source>programatically</source></property>
-<property><name>dfs.namenode.backup.http-address</name><value>0.0.0.0:50105</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.container-monitor.interval-ms</name><value>3000</value><source>yarn-default.xml</source></property>
-<property><name>mapred.reducer.new-api</name><value>true</value><source>programatically</source></property>
-<property><name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name><value>0.25</value><source>yarn-default.xml</source></property>
-<property><name>ha.zookeeper.acl</name><value>world:anyone:rwcda</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.sleep-delay-before-sigkill.ms</name><value>250</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.job.dir</name><value>/user/sseth/.staging/job_1365641922379_0001</value><source>programatically</source></property>
-<property><name>io.map.index.skip</name><value>0</value><source>core-default.xml</source></property>
-<property><name>net.topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.logging.level</name><value>info</value><source>hdfs-default.xml</source></property>
-<property><name>fs.s3.maxRetries</name><value>4</value><source>core-default.xml</source></property>
-<property><name>ha.failover-controller.new-active.rpc-timeout.ms</name><value>60000</value><source>core-default.xml</source></property>
-<property><name>s3native.client-write-packet-size</name><value>65536</value><source>core-default.xml</source></property>
-<property><name>yarn.resourcemanager.amliveliness-monitor.interval-ms</name><value>1000</value><source>yarn-default.xml</source></property>
-<property><name>hadoop.http.staticuser.user</name><value>dr.who</value><source>core-default.xml</source></property>
-<property><name>mapreduce.reduce.speculative</name><value>true</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.client.output.filter</name><value>FAILED</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.ifile.readahead.bytes</name><value>4194304</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.task.userlog.limit.kb</name><value>0</value><source>mapred-default.xml</source></property>
-<property><name>hadoop.http.authentication.simple.anonymous.allowed</name><value>true</value><source>core-default.xml</source></property>
-<property><name>hadoop.fuse.timer.period</name><value>5</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.num.extra.edits.retained</name><value>1000000</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.job.classloader.system.classes</name><value>java.,javax.,org.apache.commons.logging.,org.apache.log4j.,org.apache.hadoop.</value><source>mapred-default.xml</source></property>
-<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.submithostname</name><value>Sids-MBP.local</value><source>programatically</source></property>
-<property><name>yarn.nodemanager.resourcemanager.connect.retry_interval.secs</name><value>30</value><source>yarn-default.xml</source></property>
-<property><name>dfs.namenode.handler.count</name><value>10</value><source>hdfs-default.xml</source></property>
-<property><name>fs.automatic.close</name><value>true</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.submithostaddress</name><value>10.0.0.11</value><source>programatically</source></property>
-<property><name>mapred.map.tasks</name><value>6</value><source>because mapreduce.job.maps is deprecated</source></property>
-<property><name>dfs.datanode.directoryscan.interval</name><value>21600</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.resourcemanager.address</name><value>${yarn.resourcemanager.hostname}:8032</value><source>yarn-default.xml</source></property>
-<property><name>dfs.client.file-block-storage-locations.num-threads</name><value>10</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.health-checker.interval-ms</name><value>600000</value><source>yarn-default.xml</source></property>
-<property><name>yarn.resourcemanager.container-tokens.master-key-rolling-interval-secs</name><value>86400</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.reduce.markreset.buffer.percent</name><value>0.0</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.map.log.level</name><value>INFO</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.localizer.address</name><value>${yarn.nodemanager.hostname}:8040</value><source>yarn-default.xml</source></property>
-<property><name>dfs.bytes-per-checksum</name><value>512</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.checkpoint.max-retries</name><value>3</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.avoid.write.stale.datanode</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>ftp.stream-buffer-size</name><value>4096</value><source>core-default.xml</source></property>
-<property><name>yarn.resourcemanager.keytab</name><value>/etc/krb5.keytab</value><source>yarn-default.xml</source></property>
-<property><name>ha.health-monitor.rpc-timeout.ms</name><value>45000</value><source>core-default.xml</source></property>
-<property><name>mapreduce.output.fileoutputformat.outputdir</name><value>hdfs://localhost:8020/user/sseth/out101</value><source>programatically</source></property>
-<property><name>hadoop.security.group.mapping.ldap.search.attr.member</name><value>member</value><source>core-default.xml</source></property>
-<property><name>dfs.blockreport.initialDelay</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.job.classloader</name><value>false</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nm.liveness-monitor.expiry-interval-ms</name><value>600000</value><source>yarn-default.xml</source></property>
-<property><name>io.compression.codec.bzip2.library</name><value>system-native</value><source>core-default.xml</source></property>
-<property><name>hadoop.http.authentication.token.validity</name><value>36000</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.resource.cpu-cores</name><value>8</value><source>yarn-default.xml</source></property>
-<property><name>yarn.nodemanager.vcores-pcores-ratio</name><value>2</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.input.fileinputformat.numinputfiles</name><value>6</value><source>programatically</source></property>
-<property><name>dfs.namenode.delegation.token.max-lifetime</name><value>604800000</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.job.hdfs-servers</name><value>${fs.defaultFS}</value><source>yarn-default.xml</source></property>
-<property><name>s3native.replication</name><value>3</value><source>core-default.xml</source></property>
-<property><name>dfs.heartbeat.interval</name><value>3</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.localizer.client.thread-count</name><value>5</value><source>yarn-default.xml</source></property>
-<property><name>yarn.resourcemanager.container.liveness-monitor.interval-ms</name><value>600000</value><source>yarn-default.xml</source></property>
-<property><name>dfs.ha.fencing.ssh.connect-timeout</name><value>30000</value><source>core-default.xml</source></property>
-<property><name>yarn.am.liveness-monitor.expiry-interval-ms</name><value>600000</value><source>yarn-default.xml</source></property>
-<property><name>net.topology.impl</name><value>org.apache.hadoop.net.NetworkTopology</value><source>core-default.xml</source></property>
-<property><name>mapreduce.task.profile</name><value>false</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.linux-container-executor.resources-handler.class</name><value>org.apache.hadoop.yarn.server.nodemanager.util.DefaultLCEResourcesHandler</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.jobhistory.webapp.address</name><value>0.0.0.0:19888</value><source>mapred-default.xml</source></property>
-<property><name>yarn.ipc.rpc.class</name><value>org.apache.hadoop.yarn.ipc.HadoopYarnProtoRPC</value><source>yarn-default.xml</source></property>
-<property><name>ha.failover-controller.graceful-fence.rpc-timeout.ms</name><value>5000</value><source>core-default.xml</source></property>
-<property><name>mapreduce.map.class</name><value>org.apache.hadoop.examples.WordCount$TokenizerMapper</value><source>because mapreduce.job.map.class is deprecated</source></property>
-<property><name>mapred.input.dir</name><value>hdfs://localhost:8020/user/sseth/input</value><source>because mapreduce.input.fileinputformat.inputdir is deprecated</source></property>
-<property><name>mapreduce.combine.class</name><value>org.apache.hadoop.examples.WordCount$IntSumReducer</value><source>because mapreduce.job.combine.class is deprecated</source></property>
-<property><name>mapreduce.job.name</name><value>word count</value><source>programatically</source></property>
-<property><name>yarn.resourcemanager.application-tokens.master-key-rolling-interval-secs</name><value>86400</value><source>yarn-default.xml</source></property>
-<property><name>yarn.resourcemanager.am.max-attempts</name><value>1</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.job.ubertask.maxmaps</name><value>9</value><source>mapred-default.xml</source></property>
-<property><name>yarn.scheduler.maximum-allocation-mb</name><value>6144</value><source>yarn-site.xml</source></property>
-<property><name>dfs.namenode.secondary.http-address</name><value>0.0.0.0:50090</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.task.timeout</name><value>600000</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.framework.name</name><value>yarn</value><source>mapred-site.xml</source></property>
-<property><name>ipc.client.idlethreshold</name><value>4000</value><source>core-default.xml</source></property>
-<property><name>ipc.server.tcpnodelay</name><value>false</value><source>core-default.xml</source></property>
-<property><name>ftp.bytes-per-checksum</name><value>512</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.stale.datanode.interval</name><value>30000</value><source>hdfs-default.xml</source></property>
-<property><name>mapred.output.dir</name><value>hdfs://localhost:8020/user/sseth/out101</value><source>because mapreduce.output.fileoutputformat.outputdir is deprecated</source></property>
-<property><name>yarn.resourcemanager.hostname</name><value>0.0.0.0</value><source>yarn-default.xml</source></property>
-<property><name>s3.bytes-per-checksum</name><value>512</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.speculative.slowtaskthreshold</name><value>1.0</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.localizer.cache.target-size-mb</name><value>10240</value><source>yarn-default.xml</source></property>
-<property><name>yarn.nodemanager.remote-app-log-dir</name><value>/mapred/logs</value><source>yarn-site.xml</source></property>
-<property><name>fs.s3.block.size</name><value>67108864</value><source>core-default.xml</source></property>
-<property><name>dfs.client.failover.connection.retries</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.job.queuename</name><value>default</value><source>mapred-default.xml</source></property>
-<property><name>yarn.scheduler.minimum-allocation-mb</name><value>1024</value><source>yarn-site.xml</source></property>
-<property><name>hadoop.rpc.protection</name><value>authentication</value><source>core-default.xml</source></property>
-<property><name>yarn.app.mapreduce.client-am.ipc.max-retries</name><value>1</value><source>mapred-default.xml</source></property>
-<property><name>dfs.secondary.namenode.kerberos.internal.spnego.principal</name><value>${dfs.web.authentication.kerberos.principal}</value><source>hdfs-default.xml</source></property>
-<property><name>ftp.client-write-packet-size</name><value>65536</value><source>core-default.xml</source></property>
-<property><name>mapred.output.key.class</name><value>org.apache.hadoop.io.Text</value><source>because mapreduce.job.output.key.class is deprecated</source></property>
-<property><name>yarn.nodemanager.address</name><value>${yarn.nodemanager.hostname}:0</value><source>yarn-default.xml</source></property>
-<property><name>fs.defaultFS</name><value>hdfs://localhost:8020</value><source>core-site.xml</source></property>
-<property><name>mapreduce.task.merge.progress.records</name><value>10000</value><source>mapred-default.xml</source></property>
-<property><name>yarn.resourcemanager.scheduler.client.thread-count</name><value>50</value><source>yarn-default.xml</source></property>
-<property><name>file.client-write-packet-size</name><value>65536</value><source>core-default.xml</source></property>
-<property><name>mapreduce.reduce.cpu.vcores</name><value>1</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.delete.thread-count</name><value>4</value><source>yarn-default.xml</source></property>
-<property><name>yarn.resourcemanager.scheduler.address</name><value>${yarn.resourcemanager.hostname}:8030</value><source>yarn-default.xml</source></property>
-<property><name>fs.trash.checkpoint.interval</name><value>0</value><source>core-default.xml</source></property>
-<property><name>hadoop.http.authentication.signature.secret.file</name><value>${user.home}/hadoop-http-auth-signature-secret</value><source>core-default.xml</source></property>
-<property><name>s3native.stream-buffer-size</name><value>4096</value><source>core-default.xml</source></property>
-<property><name>mapreduce.reduce.shuffle.read.timeout</name><value>180000</value><source>mapred-default.xml</source></property>
-<property><name>yarn.app.mapreduce.am.command-opts</name><value>-Xmx1024m</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.admin.user.env</name><value>LD_LIBRARY_PATH=$HADOOP_COMMON_HOME/lib/native</value><source>mapred-default.xml</source></property>
-<property><name>yarn.resourcemanager.fs.rm-state-store.uri</name><value>${hadoop.tmp.dir}/yarn/system/rmstore</value><source>yarn-default.xml</source></property>
-<property><name>dfs.namenode.checkpoint.edits.dir</name><value>${dfs.namenode.checkpoint.dir}</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.local.clientfactory.class.name</name><value>org.apache.hadoop.mapred.LocalClientFactory</value><source>mapred-default.xml</source></property>
-<property><name>fs.permissions.umask-mode</name><value>022</value><source>core-site.xml</source></property>
-<property><name>hadoop.common.configuration.version</name><value>3.0.0</value><source>core-default.xml</source></property>
-<property><name>mapreduce.output.fileoutputformat.compress.type</name><value>RECORD</value><source>mapred-default.xml</source></property>
-<property><name>hadoop.security.group.mapping.ldap.ssl</name><value>false</value><source>core-default.xml</source></property>
-<property><name>mapreduce.ifile.readahead</name><value>true</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.aux-service.mapreduce.shuffle.class</name><value>org.apache.hadoop.mapred.ShuffleHandler</value><source>yarn-site.xml</source></property>
-<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization,org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization,org.apache.hadoop.io.serializer.avro.AvroReflectSerialization</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name><value>org.apache.hadoop.mapred.ShuffleHandler</value><source>yarn-default.xml</source></property>
-<property><name>fs.df.interval</name><value>60000</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.combine.class</name><value>org.apache.hadoop.examples.WordCount$IntSumReducer</value><source>programatically</source></property>
-<property><name>mapreduce.reduce.shuffle.input.buffer.percent</name><value>0.70</value><source>mapred-default.xml</source></property>
-<property><name>io.seqfile.compress.blocksize</name><value>1000000</value><source>core-default.xml</source></property>
-<property><name>ipc.client.connect.max.retries</name><value>10</value><source>core-default.xml</source></property>
-<property><name>hadoop.security.groups.cache.secs</name><value>300</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.delegation.key.update-interval</name><value>86400000</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.process-kill-wait.ms</name><value>2000</value><source>yarn-default.xml</source></property>
-<property><name>yarn.nodemanager.vmem-check-enabled</name><value>true</value><source>yarn-default.xml</source></property>
-<property><name>yarn.application.classpath</name><value>$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,$HADOOP_YARN_HOME/share/hadoop/yarn/*,$HADOOP_YARN_HOME/share/hadoop/yarn/lib/*</value><source>yarn-default.xml</source></property>
-<property><name>yarn.app.mapreduce.client.max-retries</name><value>3</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.log-aggregation.compression-type</name><value>none</value><source>yarn-site.xml</source></property>
-<property><name>hadoop.security.group.mapping.ldap.search.filter.user</name><value>(&amp;(objectClass=user)(sAMAccountName={0}))</value><source>core-default.xml</source></property>
-<property><name>dfs.image.compress</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.localizer.cache.cleanup.interval-ms</name><value>600000</value><source>yarn-default.xml</source></property>
-<property><name>mapred.output.value.class</name><value>org.apache.hadoop.io.IntWritable</value><source>because mapreduce.job.output.value.class is deprecated</source></property>
-<property><name>mapred.mapper.new-api</name><value>true</value><source>programatically</source></property>
-<property><name>dfs.namenode.kerberos.internal.spnego.principal</name><value>${dfs.web.authentication.kerberos.principal}</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.nodemanager.log-dirs</name><value>/Users/sseth/work2/hortonworks/mrx/run/logs</value><source>yarn-site.xml</source></property>
-<property><name>fs.s3n.block.size</name><value>67108864</value><source>core-default.xml</source></property>
-<property><name>fs.ftp.host</name><value>0.0.0.0</value><source>core-default.xml</source></property>
-<property><name>hadoop.security.group.mapping</name><value>org.apache.hadoop.security.JniBasedUnixGroupsMappingWithFallback</value><source>core-default.xml</source></property>
-<property><name>yarn.app.mapreduce.am.resource.cpu-vcores</name><value>1</value><source>mapred-default.xml</source></property>
-<property><name>dfs.datanode.address</name><value>0.0.0.0:50010</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.fsdataset.volume.choosing.balanced-space-threshold</name><value>10737418240</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.map.skip.maxrecords</name><value>0</value><source>mapred-default.xml</source></property>
-<property><name>dfs.datanode.https.address</name><value>0.0.0.0:50475</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.scheduler.minimum-allocation-vcores</name><value>1</value><source>yarn-default.xml</source></property>
-<property><name>file.replication</name><value>1</value><source>core-default.xml</source></property>
-<property><name>yarn.resourcemanager.resource-tracker.address</name><value>${yarn.resourcemanager.hostname}:8031</value><source>yarn-default.xml</source></property>
-<property><name>dfs.datanode.drop.cache.behind.reads</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.fuse.connection.timeout</name><value>300</value><source>hdfs-default.xml</source></property>
-<property><name>mapred.jar</name><value>/user/sseth/.staging/job_1365641922379_0001/job.jar</value><source>because mapreduce.job.jar is deprecated</source></property>
-<property><name>hadoop.work.around.non.threadsafe.getpwuid</name><value>false</value><source>core-default.xml</source></property>
-<property><name>mapreduce.client.genericoptionsparser.used</name><value>true</value><source>programatically</source></property>
-<property><name>mapreduce.output.fileoutputformat.compress</name><value>false</value><source>mapred-default.xml</source></property>
-<property><name>hadoop.tmp.dir</name><value>/hadoopWorkDirMrx/tmp/hadoop-${user.name}</value><source>core-site.xml</source></property>
-<property><name>dfs.client.block.write.replace-datanode-on-failure.policy</name><value>DEFAULT</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.kerberos.kinit.command</name><value>kinit</value><source>core-default.xml</source></property>
-<property><name>mapreduce.job.committer.setup.cleanup.needed</name><value>true</value><source>mapred-default.xml</source></property>
-<property><name>dfs.webhdfs.enabled</name><value>false</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.du.reserved</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.task.profile.reduces</name><value>0-2</value><source>mapred-default.xml</source></property>
-<property><name>file.bytes-per-checksum</name><value>512</value><source>core-default.xml</source></property>
-<property><name>mapreduce.input.fileinputformat.inputdir</name><value>hdfs://localhost:8020/user/sseth/input</value><source>programatically</source></property>
-<property><name>dfs.client.block.write.replace-datanode-on-failure.enable</name><value>true</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.job.output.value.class</name><value>org.apache.hadoop.io.IntWritable</value><source>programatically</source></property>
-<property><name>yarn.app.mapreduce.am.job.committer.commit-window</name><value>10000</value><source>mapred-default.xml</source></property>
-<property><name>net.topology.script.number.args</name><value>100</value><source>core-default.xml</source></property>
-<property><name>mapreduce.task.profile.maps</name><value>0-2</value><source>mapred-default.xml</source></property>
-<property><name>dfs.namenode.decommission.interval</name><value>30</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.image.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.resourcemanager.webapp.address</name><value>${yarn.resourcemanager.hostname}:8088</value><source>yarn-default.xml</source></property>
-<property><name>dfs.namenode.support.allow.format</name><value>true</value><source>hdfs-default.xml</source></property>
-<property><name>hadoop.ssl.hostname.verifier</name><value>DEFAULT</value><source>core-default.xml</source></property>
-<property><name>yarn.nodemanager.vmem-pmem-ratio</name><value>2.1</value><source>yarn-default.xml</source></property>
-<property><name>yarn.nodemanager.hostname</name><value>0.0.0.0</value><source>yarn-default.xml</source></property>
-<property><name>ipc.client.connect.timeout</name><value>20000</value><source>core-default.xml</source></property>
-<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value><source>core-default.xml</source></property>
-<property><name>mapreduce.jobhistory.principal</name><value>jhs/_HOST@REALM.TLD</value><source>mapred-default.xml</source></property>
-<property><name>dfs.permissions.superusergroup</name><value>supergroup</value><source>hdfs-default.xml</source></property>
-<property><name>mapreduce.shuffle.ssl.file.buffer.size</name><value>65536</value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.cluster.acls.enabled</name><value>false</value><source>mapred-default.xml</source></property>
-<property><name>yarn.nodemanager.remote-app-log-dir-suffix</name><value>logs</value><source>yarn-site.xml</source></property>
-<property><name>ha.failover-controller.graceful-fence.connection.retries</name><value>1</value><source>core-default.xml</source></property>
-<property><name>ha.health-monitor.connect-retry-interval.ms</name><value>1000</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.checkpoint.check.period</name><value>60</value><source>hdfs-default.xml</source></property>
-<property><name>io.seqfile.local.dir</name><value>${hadoop.tmp.dir}/io/local</value><source>core-default.xml</source></property>
-<property><name>yarn.app.mapreduce.am.resource.mb</name><value>1024</value><source>mapred-site.xml</source></property>
-<property><name>mapreduce.reduce.shuffle.merge.percent</name><value>0.66</value><source>mapred-default.xml</source></property>
-<property><name>tfile.io.chunk.size</name><value>1048576</value><source>core-default.xml</source></property>
-<property><name>file.blocksize</name><value>67108864</value><source>core-default.xml</source></property>
-<property><name>yarn.resourcemanager.nm.liveness-monitor.interval-ms</name><value>1000</value><source>yarn-default.xml</source></property>
-<property><name>yarn.nodemanager.webapp.address</name><value>${yarn.nodemanager.hostname}:8042</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.job.acl-modify-job</name><value> </value><source>mapred-default.xml</source></property>
-<property><name>mapreduce.am.max-attempts</name><value>1</value><source>mapred-default.xml</source></property>
-<property><name>io.skip.checksum.errors</name><value>false</value><source>core-default.xml</source></property>
-<property><name>yarn.app.mapreduce.am.staging-dir</name><value>/user</value><source>mapred-site.xml</source></property>
-<property><name>dfs.namenode.edits.journal-plugin.qjournal</name><value>org.apache.hadoop.hdfs.qjournal.client.QuorumJournalManager</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.datanode.handler.count</name><value>10</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.decommission.nodes.per.interval</name><value>5</value><source>hdfs-default.xml</source></property>
-<property><name>fs.ftp.host.port</name><value>21</value><source>core-default.xml</source></property>
-<property><name>dfs.namenode.checkpoint.period</name><value>3600</value><source>hdfs-default.xml</source></property>
-<property><name>dfs.namenode.fs-limits.max-component-length</name><value>0</value><source>hdfs-default.xml</source></property>
-<property><name>yarn.resourcemanager.admin.client.thread-count</name><value>1</value><source>yarn-default.xml</source></property>
-<property><name>fs.AbstractFileSystem.viewfs.impl</name><value>org.apache.hadoop.fs.viewfs.ViewFs</value><source>core-default.xml</source></property>
-<property><name>yarn.resourcemanager.resource-tracker.client.thread-count</name><value>50</value><source>yarn-default.xml</source></property>
-<property><name>mapreduce.map.output.compress</name><value>false</value><source>mapred-default.xml</source></property>
-<property><name>dfs.datanode.ipc.address</name><value>0.0.0.0:50020</value><source>hdfs-default.xml</source></property>
-<property><name>mapred.working.dir</name><value>hdfs://localhost:8020/user/sseth</value><source>because mapreduce.job.working.dir is deprecated</source></property>
-<property><name>yarn.nodemanager.delete.debug-delay-sec</name><value>3600</value><source>yarn-site.xml</source></property>
-<property><name>hadoop.ssl.require.client.cert</name><value>false</value><source>core-default.xml</source></property>
-<property><name>dfs.datanode.max.transfer.threads</name><value>4096</value><source>hdfs-default.xml</source></property>
-</configuration>

http://git-wip-us.apache.org/repos/asf/incubator-tez/blob/5d09d046/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
index d195e2d..c980d9a 100644
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
+++ b/tez-dag/src/main/java/org/apache/tez/dag/app/DAGAppMaster.java
@@ -31,9 +31,6 @@ import java.util.concurrent.ConcurrentMap;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 
-import org.apache.commons.cli.CommandLine;
-import org.apache.commons.cli.GnuParser;
-import org.apache.commons.cli.Options;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.hadoop.conf.Configuration;
@@ -1004,42 +1001,24 @@ public class DAGAppMaster extends CompositeService {
           containerId.getApplicationAttemptId();
       long appSubmitTime = Long.parseLong(appSubmitTimeStr);
 
-      Options opts = getCliOptions();
-      CommandLine cliParser = new GnuParser().parse(opts, args);
-
-      // Default to running mr if nothing specified.
-      // TODO change this once the client is ready.
-      String type;
       TezConfiguration conf = new TezConfiguration(new YarnConfiguration());
 
       DAGPlan dagPlan = null;
-      if (cliParser.hasOption(OPT_PREDEFINED)) {
-        LOG.info("Running with PreDefined configuration");
-        type = cliParser.getOptionValue(OPT_PREDEFINED, "mr");
-        LOG.info("Running job type: " + type);
-
-        if (type.equals("mr")) {
-          dagPlan = MRRExampleHelper.createDAGConfigurationForMR();
-        } else if (type.equals("mrr")) {
-          dagPlan = MRRExampleHelper.createDAGConfigurationForMRR();
+
+      // Read the protobuf DAG
+      DAGPlan.Builder dagPlanBuilder = DAGPlan.newBuilder();
+      FileInputStream dagPBBinaryStream = null;
+      try {
+        dagPBBinaryStream = new FileInputStream(
+            TezConfiguration.DAG_AM_PLAN_PB_BINARY);
+        dagPlanBuilder.mergeFrom(dagPBBinaryStream);
+      } finally {
+        if (dagPBBinaryStream != null) {
+          dagPBBinaryStream.close();
         }
       }
-      else {
-        // Read the protobuf DAG
-        DAGPlan.Builder dagPlanBuilder = DAGPlan.newBuilder();
-        FileInputStream dagPBBinaryStream = null;
-        try {
-          dagPBBinaryStream = new FileInputStream(TezConfiguration.DAG_AM_PLAN_PB_BINARY);
-          dagPlanBuilder.mergeFrom(dagPBBinaryStream);
-        }
-        finally {
-          if(dagPBBinaryStream != null){
-            dagPBBinaryStream.close();
-          }
-        }
 
-        dagPlan = dagPlanBuilder.build();
-      }
+      dagPlan = dagPlanBuilder.build();
 
       if (LOG.isDebugEnabled()) {
         LOG.debug("Running a DAG with "
@@ -1078,17 +1057,6 @@ public class DAGAppMaster extends CompositeService {
     }
   }
 
-  private static String OPT_PREDEFINED = "predefined";
-
-  private static Options getCliOptions() {
-    Options opts = new Options();
-    opts.addOption(OPT_PREDEFINED, true,
-        "Whether to run the predefined MR/MRR jobs");
-    return opts;
-  }
-
-
-
   // The shutdown hook that runs when a signal is received AND during normal
   // close of the JVM.
   static class DAGAppMasterShutdownHook implements Runnable {

http://git-wip-us.apache.org/repos/asf/incubator-tez/blob/5d09d046/tez-dag/src/main/java/org/apache/tez/dag/app/MRRExampleHelper.java
----------------------------------------------------------------------
diff --git a/tez-dag/src/main/java/org/apache/tez/dag/app/MRRExampleHelper.java b/tez-dag/src/main/java/org/apache/tez/dag/app/MRRExampleHelper.java
deleted file mode 100644
index 5250704..0000000
--- a/tez-dag/src/main/java/org/apache/tez/dag/app/MRRExampleHelper.java
+++ /dev/null
@@ -1,212 +0,0 @@
-package org.apache.tez.dag.app;
-
-import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TreeMap;
-
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.v2.util.MRApps;
-import org.apache.hadoop.security.UserGroupInformation;
-import org.apache.hadoop.yarn.api.records.LocalResource;
-import org.apache.hadoop.yarn.api.records.LocalResourceType;
-import org.apache.hadoop.yarn.api.records.LocalResourceVisibility;
-import org.apache.hadoop.yarn.api.records.Resource;
-import org.apache.tez.dag.api.records.DAGProtos.DAGPlan;
-import org.apache.tez.dag.api.Edge;
-import org.apache.tez.dag.api.EdgeProperty;
-import org.apache.tez.dag.api.EdgeProperty.ConnectionPattern;
-import org.apache.tez.dag.api.EdgeProperty.SourceType;
-import org.apache.tez.dag.api.InputDescriptor;
-import org.apache.tez.dag.api.OutputDescriptor;
-import org.apache.tez.dag.api.ProcessorDescriptor;
-import org.apache.tez.dag.api.TezConfiguration;
-import org.apache.tez.dag.api.Vertex;
-import org.apache.tez.dag.app.rm.container.AMContainerHelpers;
-import org.apache.tez.engine.lib.input.ShuffledMergedInput;
-import org.apache.tez.engine.lib.output.OnFileSortedOutput;
-import org.apache.tez.mapreduce.hadoop.MRJobConfig;
-
-public class MRRExampleHelper {
-
-  private static final Log LOG = LogFactory.getLog(MRRExampleHelper.class);
-  
-  //TODO remove once client is in place
- private static Path getMRBaseDir() throws IOException {
-   Path basePath = MRApps.getStagingAreaDir(new Configuration(),
-       UserGroupInformation.getCurrentUser().getShortUserName());
-   return new Path(basePath, "dagTest");
- }
-
- private static Path getMRRBaseDir() throws IOException {
-   Path basePath = MRApps.getStagingAreaDir(new Configuration(),
-       UserGroupInformation.getCurrentUser().getShortUserName());
-   return new Path(basePath, "mrrTest");
- }
-
- private static String getConfFileName(String vertexName) {
-   return MRJobConfig.JOB_CONF_FILE + "_" + vertexName;
- }
-
- // TODO remove once client is in place
- private static Map<String, LocalResource> createLocalResources(
-     Path remoteBaseDir, String[] resourceNames) throws IOException {
-   Configuration conf = new Configuration();
-   FileSystem fs = FileSystem.get(conf);
-
-   Map<String, LocalResource> localResources = new TreeMap<String, LocalResource>();
-
-   for (String resourceName : resourceNames) {
-     Path remoteFile = new Path(remoteBaseDir, resourceName);
-     localResources.put(resourceName, AMContainerHelpers.createLocalResource(
-         fs, remoteFile, LocalResourceType.FILE,
-         LocalResourceVisibility.APPLICATION));
-     LOG.info("Localizing file " + resourceName + " from location "
-         + remoteFile.toString());
-   }
-   return localResources;
- }
-
-
- private static String[] getMRLocalRsrcList() {
-   String[] resourceNames = new String[] { MRJobConfig.JOB_JAR,
-       MRJobConfig.JOB_SPLIT, MRJobConfig.JOB_SPLIT_METAINFO,
-       MRJobConfig.JOB_CONF_FILE };
-   return resourceNames;
- }
-
- private static String[] getMRRLocalRsrcList() {
-   String[] resourceNames = new String[] { MRJobConfig.JOB_JAR,
-       MRJobConfig.JOB_SPLIT, MRJobConfig.JOB_SPLIT_METAINFO,
-       MRJobConfig.JOB_CONF_FILE, getConfFileName("reduce1"),
-       getConfFileName("reduce2") };
-   return resourceNames;
- }
-
- // TODO: these preconfigured jobs seem to require User and perhaps some other work.
- //       -> not tested with new DagPB system.
- 
- static DAGPlan createDAGConfigurationForMRR() throws IOException {
-   org.apache.tez.dag.api.DAG dag = new org.apache.tez.dag.api.DAG("examplemrrjob");
-    Vertex mapVertex = new Vertex("map", new ProcessorDescriptor(
-        "org.apache.tez.mapreduce.task.InitialTask", null), 6);
-    Vertex reduce1Vertex = new Vertex("reduce1", new ProcessorDescriptor(
-        "org.apache.tez.mapreduce.task.IntermediateTask", null), 3);
-    Vertex reduce2Vertex = new Vertex("reduce2", new ProcessorDescriptor(
-        "org.apache.tez.mapreduce.task.FinalTask", null), 3);
-   Edge edge1 = new Edge(mapVertex, reduce1Vertex,
-       new EdgeProperty(ConnectionPattern.BIPARTITE,
-           SourceType.STABLE,
-           new OutputDescriptor(OnFileSortedOutput.class.getName(), null),
-           new InputDescriptor(ShuffledMergedInput.class.getName(), null)));
-   Edge edge2 = new Edge(reduce1Vertex, reduce2Vertex,
-       new EdgeProperty(ConnectionPattern.BIPARTITE,
-           SourceType.STABLE,
-           new OutputDescriptor(OnFileSortedOutput.class.getName(), null),
-           new InputDescriptor(ShuffledMergedInput.class.getName(), null)));
-   Map<String, LocalResource> jobRsrcs = createLocalResources(getMRRBaseDir(),
-       getMRRLocalRsrcList());
-
-   Map<String, LocalResource> mapRsrcs = new HashMap<String, LocalResource>();
-   Map<String, LocalResource> reduce1Rsrcs = new HashMap<String, LocalResource>();
-   Map<String, LocalResource> reduce2Rsrcs = new HashMap<String, LocalResource>();
-
-   mapRsrcs.put(MRJobConfig.JOB_SPLIT, jobRsrcs.get(MRJobConfig.JOB_SPLIT));
-   mapRsrcs.put(MRJobConfig.JOB_SPLIT_METAINFO, jobRsrcs.get(MRJobConfig.JOB_SPLIT_METAINFO));
-   mapRsrcs.put(MRJobConfig.JOB_JAR, jobRsrcs.get(MRJobConfig.JOB_JAR));
-   mapRsrcs.put(MRJobConfig.JOB_CONF_FILE, jobRsrcs.get(MRJobConfig.JOB_CONF_FILE));
-   mapRsrcs.put(getConfFileName("map"), jobRsrcs.get(MRJobConfig.JOB_CONF_FILE));
-
-   reduce1Rsrcs.put(MRJobConfig.JOB_JAR, jobRsrcs.get(MRJobConfig.JOB_JAR));
-   reduce1Rsrcs.put(MRJobConfig.JOB_CONF_FILE, jobRsrcs.get(MRJobConfig.JOB_CONF_FILE));
-   reduce1Rsrcs.put(getConfFileName("reduce1"), jobRsrcs.get(getConfFileName("reduce1")));
-
-   reduce2Rsrcs.put(MRJobConfig.JOB_JAR, jobRsrcs.get(MRJobConfig.JOB_JAR));
-   reduce2Rsrcs.put(MRJobConfig.JOB_CONF_FILE, jobRsrcs.get(MRJobConfig.JOB_CONF_FILE));
-   reduce2Rsrcs.put(getConfFileName("reduce2"), jobRsrcs.get(getConfFileName("reduce2")));
-
-    Resource mapResource = Resource.newInstance(
-        MRJobConfig.DEFAULT_MAP_MEMORY_MB,
-        MRJobConfig.DEFAULT_MAP_CPU_VCORES);
-   
-   mapVertex.setTaskResource(mapResource);
-   mapVertex.setTaskLocalResources(mapRsrcs);
-   Resource reduceResource = Resource.newInstance(
-       MRJobConfig.DEFAULT_REDUCE_MEMORY_MB,
-       MRJobConfig.DEFAULT_REDUCE_CPU_VCORES);
-   reduce1Vertex.setTaskResource(reduceResource);
-   reduce1Vertex.setTaskLocalResources(reduce1Rsrcs);
-
-   reduce1Vertex.setTaskResource(reduceResource);
-   reduce2Vertex.setTaskLocalResources(reduce2Rsrcs);
-
-   dag.addVertex(mapVertex);
-   dag.addVertex(reduce1Vertex);
-   dag.addVertex(reduce2Vertex);
-   dag.addEdge(edge1);
-   dag.addEdge(edge2);
-   dag.verify();
-//   dag.addConfiguration(MRJobConfig.MAP_SPECULATIVE, new Boolean(false).toString());
-//   dag.addConfiguration(MRJobConfig.REDUCE_SPECULATIVE, new Boolean(false).toString());
-   
-   DAGPlan dagPB = dag.createDag(new TezConfiguration());
-   return dagPB;
- }
-
- // TODO remove once client is in place
- static DAGPlan createDAGConfigurationForMR() throws IOException {
-   org.apache.tez.dag.api.DAG dag = new org.apache.tez.dag.api.DAG("examplemrjob");
-    Vertex mapVertex = new Vertex("map", new ProcessorDescriptor(
-        "org.apache.tez.mapreduce.task.InitialTask", null), 6);
-    Vertex reduceVertex = new Vertex("reduce", new ProcessorDescriptor(
-        "org.apache.tez.mapreduce.task.FinalTask", null), 1);
-   Edge edge = new Edge(mapVertex, reduceVertex,
-       new EdgeProperty(ConnectionPattern.BIPARTITE,
-           SourceType.STABLE,
-           new OutputDescriptor(OnFileSortedOutput.class.getName(), null),
-           new InputDescriptor(ShuffledMergedInput.class.getName(), null)));
-
-   Map<String, LocalResource> jobRsrcs = createLocalResources(getMRBaseDir(),
-       getMRLocalRsrcList());
-
-   Map<String, LocalResource> mapRsrcs = new HashMap<String, LocalResource>();
-   Map<String, LocalResource> reduceRsrcs = new HashMap<String, LocalResource>();
-
-   mapRsrcs.put(MRJobConfig.JOB_SPLIT, jobRsrcs.get(MRJobConfig.JOB_SPLIT));
-   mapRsrcs.put(MRJobConfig.JOB_SPLIT_METAINFO, jobRsrcs.get(MRJobConfig.JOB_SPLIT_METAINFO));
-   mapRsrcs.put(MRJobConfig.JOB_JAR, jobRsrcs.get(MRJobConfig.JOB_JAR));
-   mapRsrcs.put(MRJobConfig.JOB_CONF_FILE, jobRsrcs.get(MRJobConfig.JOB_CONF_FILE));
-   mapRsrcs.put(getConfFileName("map"), jobRsrcs.get(MRJobConfig.JOB_CONF_FILE));
-
-   reduceRsrcs.put(MRJobConfig.JOB_JAR, jobRsrcs.get(MRJobConfig.JOB_JAR));
-   reduceRsrcs.put(MRJobConfig.JOB_CONF_FILE, jobRsrcs.get(MRJobConfig.JOB_CONF_FILE));
-   reduceRsrcs.put(getConfFileName("reduce"), jobRsrcs.get(MRJobConfig.JOB_CONF_FILE));
-
-   Resource mapResource = Resource.newInstance(
-        MRJobConfig.DEFAULT_MAP_MEMORY_MB,
-        MRJobConfig.DEFAULT_MAP_CPU_VCORES);
-   mapVertex.setTaskResource(mapResource);
-   mapVertex.setTaskLocalResources(mapRsrcs);
-   Resource reduceResource = Resource.newInstance(
-       MRJobConfig.DEFAULT_REDUCE_MEMORY_MB,
-       MRJobConfig.DEFAULT_REDUCE_CPU_VCORES);
-   reduceVertex.setTaskResource(reduceResource);
-   reduceVertex.setTaskLocalResources(reduceRsrcs);
-   dag.addVertex(mapVertex);
-   dag.addVertex(reduceVertex);
-   dag.addEdge(edge);
-   dag.verify();
-   
-//   dag.addConfiguration(MRJobConfig.MAP_SPECULATIVE, new Boolean(false).toString());
-//   dag.addConfiguration(MRJobConfig.REDUCE_SPECULATIVE, new Boolean(false).toString());
-   
-   DAGPlan dagPB = dag.createDag(new TezConfiguration());
-   
-   return dagPB;
- }
-  
-}


Mime
View raw message