hadoop-mapreduce-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From psdc1978 <psdc1...@gmail.com>
Subject Run IsolationRunner class with wordcount example
Date Sat, 01 May 2010 19:18:03 GMT
Hi,

I'm trying to start the IsolationRunner class with the example of the
wordcount. But to try to do that I'm using the temp data that was created
during a normal execution of the wordcount example. During this execution,
the following structure was created:

hadoop@virtuakarmic:~/isolationrunner/hadoop-hadoop/mapred/local$
tree

.

|--
jobTracker

|   |--
job_201005012019_0002.jar

|   `--
job_201005012019_0002.xml

`--
taskTracker

    `--
jobcache

        `--
job_201005012019_0002

            |--
attempt_201005012019_0002_m_000000_0_0_m_0

            |   |--
job.xml

            |   |--
output

            |   |   |--
file.hash

            |   |   |--
file.out

            |   |   `--
file.out.index

            |   |--
pid

            |   `--
split.dta

            |--
attempt_201005012019_0002_m_000001_1_0_m_0

            |   |--
job.xml

            |   |--
output

            |   |   |--
file.hash

            |   |   |--
file.out

            |   |   `--
file.out.index

            |   |--
pid

            |   `--
split.dta

            |--
attempt_201005012019_0002_r_000000_0_0_r_0

            |   |--
job.xml

            |   |--
map_0.out

            |   |--
output

            |   |   |--
map_0.out

            |   |   `--
map_1.out

            |   |--
pid

            |   `--
work

            |       `--
tmp

            |--
jars

            |   |--
META-INF

            |   |   `--
MANIFEST.MF

            |   |--
job.jar

            |   `--
org

            |       `--
apache

            |           `--
hadoop

            |               `--
examples

            |                   |--
AggregateWordCount$WordCountPlugInClass.class

            |                   |--
AggregateWordCount.class

            |                   |--
AggregateWordHistogram$AggregateWordHistogramPlugin.class

            |                   |--
AggregateWordHistogram.class

            |                   |--
DBCountPageView$AccessRecord.class

            |                   |--
DBCountPageView$PageviewMapper.class

            |                   |--
DBCountPageView$PageviewRecord.class

            |                   |--
DBCountPageView$PageviewReducer.class

            |                   |--
DBCountPageView.class

            |                   |--
ExampleDriver.class

            |                   |--
Grep.class

            |                   |--
Join.class

            |                   |--
MultiFileWordCount$MapClass.class

            |                   |--
MultiFileWordCount$MultiFileLineRecordReader.class

            |                   |--
MultiFileWordCount$MyInputFormat.class

            |                   |--
MultiFileWordCount$WordOffset.class

            |                   |--
MultiFileWordCount.class

            |                   |--
PiEstimator$HaltonSequence.class

            |                   |--
PiEstimator$PiMapper.class

            |                   |--
PiEstimator$PiReducer.class

            |                   |--
PiEstimator.class

            |                   |--
RandomTextWriter$Counters.class

            |                   |--
RandomTextWriter$Map.class

            |                   |--
RandomTextWriter.class

            |                   |--
RandomWriter$Counters.class

            |                   |--
RandomWriter$Map.class

            |                   |--
RandomWriter$RandomInputFormat$RandomRecordReader.class

            |                   |--
RandomWriter$RandomInputFormat.class

            |                   |--
RandomWriter.class

            |                   |--
SecondarySort$FirstGroupingComparator.class

            |                   |--
SecondarySort$FirstPartitioner.class

            |                   |--
SecondarySort$IntPair$Comparator.class

            |                   |--
SecondarySort$IntPair.class

            |                   |--
SecondarySort$MapClass.class

            |                   |--
SecondarySort$Reduce.class

            |                   |--
SecondarySort.class

            |                   |--
SleepJob$EmptySplit.class

            |                   |--
SleepJob$SleepInputFormat$1.class

            |                   |--
SleepJob$SleepInputFormat.class

            |                   |--
SleepJob.class

            |                   |--
Sort.class

            |                   |--
WordCount$IntSumReducer.class

            |                   |--
WordCount$TokenizerMapper.class

            |                   |--
WordCount.class

            |                   |--
dancing

            |                   |   |--
DancingLinks$ColumnHeader.class

            |                   |   |--
DancingLinks$Node.class

            |                   |   |--
DancingLinks$SolutionAcceptor.class

            |                   |   |--
DancingLinks.class

            |                   |   |--
DistributedPentomino$PentMap$SolutionCatcher.class

            |                   |   |--
DistributedPentomino$PentMap.class

            |                   |   |--
DistributedPentomino.class

            |                   |   |--
OneSidedPentomino.class

            |                   |   |--
Pentomino$ColumnName.class

            |                   |   |-- Pentomino$Piece.class
            |                   |   |-- Pentomino$Point.class
            |                   |   |-- Pentomino$SolutionCategory.class
            |                   |   |-- Pentomino$SolutionPrinter.class
            |                   |   |-- Pentomino.class
            |                   |   |-- Sudoku$CellConstraint.class
            |                   |   |-- Sudoku$ColumnConstraint.class
            |                   |   |-- Sudoku$ColumnName.class
            |                   |   |-- Sudoku$RowConstraint.class
            |                   |   |-- Sudoku$SolutionPrinter.class
            |                   |   |-- Sudoku$SquareConstraint.class
            |                   |   `-- Sudoku.class
            |                   `-- terasort
            |                       |-- TeraGen$RandomGenerator.class
            |                       |--
TeraGen$RangeInputFormat$RangeInputSplit.class
            |                       |--
TeraGen$RangeInputFormat$RangeRecordReader.class
            |                       |-- TeraGen$RangeInputFormat.class
            |                       |-- TeraGen$SortGenMapper.class
            |                       |-- TeraGen.class
            |                       |--
TeraInputFormat$TeraRecordReader.class
            |                       |-- TeraInputFormat$TextSampler.class
            |                       |-- TeraInputFormat.class
            |                       |--
TeraOutputFormat$TeraRecordWriter.class
            |                       |-- TeraOutputFormat.class
            |                       |--
TeraSort$TotalOrderPartitioner$InnerTrieNode.class
            |                       |--
TeraSort$TotalOrderPartitioner$LeafTrieNode.class
            |                       |--
TeraSort$TotalOrderPartitioner$TrieNode.class
            |                       |-- TeraSort$TotalOrderPartitioner.class
            |                       |-- TeraSort.class
            |                       |-- TeraValidate$ValidateMapper.class
            |                       |-- TeraValidate$ValidateReducer.class
            |                       `-- TeraValidate.class
            |-- job.xml
            `-- work

To reuse this structure, I'm running IsolationRunner with the following
argument:
/home/hadoop/isolationrunner/hadoop-hadoop/mapred/local/taskTracker/jobcache/job_201005012019_0002/attempt_201005012019_0002_r_000000_0_0_r_0/job.xml

This argument is the path to the job.xml from a reducer task.

I run the IsolationRunner with the following command:
hadoop@virtuakarmic:~/isolationrunner/hadoop-hadoop/mapred/local/taskTracker/jobcache/job_201005012019_0002/attempt_201005012019_0002_r_000000_0_0_r_0$
~/hadoop-0.20.1_bkp/bin/hadoop org.apache.hadoop.mapred.IsolationRunner
./job.xml

The problem is that the combiner class of the wordcount isn't found. The
error that I get is:

hadoop@virtuakarmic:~/isolationrunner/hadoop-hadoop/mapred/local/taskTracker/jobcache/job_201005012019_0002/attempt_201005012019_0002_r_000000_0_0_r_0$
~/hadoop-0.20.1_bkp/bin/hadoop org.apac
he.hadoop.mapred.IsolationRunner ./job.xml
10/05/01 21:15:35 INFO mortbay.log:? Logging to
org.slf4j.impl.Log4jLoggerAdapter(org.mortbay.log) via
org.mortbay.log.Slf4jLog
10/05/01 21:15:35 INFO mortbay.log:? forName:
attempt_201005012019_0002_r_000000_0_0_r_0
10/05/01 21:15:35 INFO mapred.ReduceTask:393 ReduceCopier created for
job_201005012019_0002
Exception in thread "main" java.lang.RuntimeException:
java.lang.ClassNotFoundException:
org.apache.hadoop.examples.WordCount$IntSumReducer
        at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:808)
        at
org.apache.hadoop.mapreduce.JobContext.getCombinerClass(JobContext.java:169)
        at
org.apache.hadoop.mapred.Task$CombinerRunner.create(Task.java:1159)
        at
org.apache.hadoop.mapred.ReduceTask$ReduceCopier.<init>(ReduceTask.java:2157)
        at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:395)
        at
org.apache.hadoop.mapred.IsolationRunner.main(IsolationRunner.java:218)
Caused by: java.lang.ClassNotFoundException:
org.apache.hadoop.examples.WordCount$IntSumReducer
        at java.net.URLClassLoader$1.run(URLClassLoader.java:200)
        at java.security.AccessController.doPrivileged(Native Method)
        at java.net.URLClassLoader.findClass(URLClassLoader.java:188)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:307)
        at java.lang.ClassLoader.loadClass(ClassLoader.java:252)
        at java.lang.ClassLoader.loadClassInternal(ClassLoader.java:320)
        at java.lang.Class.forName0(Native Method)
        at java.lang.Class.forName(Class.java:247)
        at
org.apache.hadoop.conf.Configuration.getClassByName(Configuration.java:761)
        at
org.apache.hadoop.conf.Configuration.getClass(Configuration.java:806)
        ... 5 more



I'm gonna put the job.xml file, if anyone want to see the parameters:
hadoop@virtuakarmic:~/isolationrunner/hadoop-hadoop/mapred/local/taskTracker/jobcache/job_201005012019_0002/attempt_201005012019_0002_r_000000_0_0_r_0$
cat job.xml
<?xml version="1.0" encoding="UTF-8"
standalone="no"?><configuration>

<property><name>fs.s3n.impl</name><value>org.apache.hadoop.fs.s3native.NativeS3FileSystem</value></property>

<property><name>mapred.task.is.map</name><value>false</value></property>

<property><name>mapred.task.cache.levels</name><value>2</value></property>

<property><name>hadoop.tmp.dir</name><value>/tmp/dir/hadoop-${user.name}</value></property>

<property><name>hadoop.native.lib</name><value>true</value></property>

<property><name>map.sort.class</name><value>org.apache.hadoop.util.QuickSort</value></property>

<property><name>ipc.client.idlethreshold</name><value>4000</value></property>

<property><name>mapred.system.dir</name><value>${hadoop.tmp.dir}/mapred/system</value></property>

<property><name>mapred.job.tracker.persist.jobstatus.hours</name><value>0</value></property>

<property><name>io.skip.checksum.errors</name><value>false</value></property>

<property><name>mapred.task.id</name><value>attempt_201005012019_0002_r_000000_0_0_r_0</value></property>

<property><name>fs.default.name</name><value>file:///</value></property>

<property><name>mapred.reducer.new-api</name><value>true</value></property>

<property><name>mapred.child.tmp</name><value>./tmp</value></property>

<property><name>mapred.skip.reduce.max.skip.groups</name><value>0</value></property>

<property><name>mapred.jobtracker.instrumentation</name><value>org.apache.hadoop.mapred.JobTrackerMetricsInst</value></property>

<property><name>mapred.tasktracker.dns.nameserver</name><value>default</value></property>

<property><name>io.sort.factor</name><value>10</value></property>

<property><name>mapred.task.timeout</name><value>600000</value></property>

<property><name>mapred.max.tracker.failures</name><value>4</value></property>

<property><name>hadoop.rpc.socket.factory.class.default</name><value>org.apache.hadoop.net.StandardSocketFactory</value></property>

<property><name>fs.hdfs.impl</name><value>org.apache.hadoop.hdfs.DistributedFileSystem</value></property>

<property><name>mapred.queue.default.acl-administer-jobs</name><value>*</value></property>

<property><name>mapred.queue.default.acl-submit-job</name><value>*</value></property>

<property><name>mapred.output.key.class</name><value>org.apache.hadoop.io.Text</value></property>

<property><name>mapred.skip.map.auto.incr.proc.count</name><value>true</value></property>

<property><name>io.mapfile.bloom.size</name><value>1048576</value></property>

<property><name>tasktracker.http.threads</name><value>40</value></property>

<property><name>mapred.job.shuffle.merge.percent</name><value>0.66</value></property>

<property><name>fs.ftp.impl</name><value>org.apache.hadoop.fs.ftp.FTPFileSystem</value></property>

<property><name>user.name</name><value>hadoop</value></property>

<property><name>mapred.output.compress</name><value>false</value></property>

<property><name>io.bytes.per.checksum</name><value>512</value></property>

<property><name>topology.node.switch.mapping.impl</name><value>org.apache.hadoop.net.ScriptBasedMapping</value></property>

<property><name>mapred.reduce.slowstart.completed.maps</name><value>0.05</value></property>

<property><name>mapred.reduce.max.attempts</name><value>4</value></property>

<property><name>fs.ramfs.impl</name><value>org.apache.hadoop.fs.InMemoryFileSystem</value></property>

<property><name>mapred.skip.map.max.skip.records</name><value>0</value></property>

<property><name>mapred.job.tracker.persist.jobstatus.dir</name><value>/jobtracker/jobsInfo</value></property>

<property><name>hadoop.job.ugi</name><value>hadoop,adm,dialout,fax,cdrom,floppy,audio,video,plugdev,syslog,lpadmin,virtualbox</value></property>

<property><name>mapred.jar</name><value>/home/hadoop/isolationrunner/hadoop-hadoop/mapred/local/taskTracker/jobcache/job_201005012019_0002/jars/job.jar</value></property>
<property><name>fs.s3.buffer.dir</name><value>${hadoop.tmp.dir}/s3</value></property>

<property><name>job.end.retry.attempts</name><value>0</value></property>

<property><name>fs.file.impl</name><value>org.apache.hadoop.fs.LocalFileSystem</value></property>

<property><name>mapred.local.dir.minspacestart</name><value>0</value></property>

<property><name>mapred.output.compression.type</name><value>RECORD</value></property>

<property><name>topology.script.number.args</name><value>100</value></property>

<property><name>io.mapfile.bloom.error.rate</name><value>0.005</value></property>

<property><name>mapred.max.tracker.blacklists</name><value>4</value></property>

<property><name>mapred.task.profile.maps</name><value>0-2</value></property>

<property><name>mapred.userlog.retain.hours</name><value>24</value></property>

<property><name>mapred.job.tracker.persist.jobstatus.active</name><value>false</value></property>

<property><name>hadoop.security.authorization</name><value>false</value></property>

<property><name>local.cache.size</name><value>10737418240</value></property>

<property><name>mapred.min.split.size</name><value>0</value></property>

<property><name>mapred.map.tasks</name><value>2</value></property>

<property><name>mapred.child.java.opts</name><value>-Xmx200m</value></property>

<property><name>mapred.output.value.class</name><value>org.apache.hadoop.io.IntWritable</value></property>

<property><name>mapred.job.queue.name</name><value>default</value></property>

<property><name>ipc.server.listen.queue.size</name><value>128</value></property>

<property><name>group.name</name><value>adm</value></property>

<property><name>mapred.inmem.merge.threshold</name><value>1000</value></property>

<property><name>job.end.retry.interval</name><value>30000</value></property>

<property><name>mapred.skip.attempts.to.start.skipping</name><value>2</value></property>

<property><name>fs.checkpoint.dir</name><value>${hadoop.tmp.dir}/dfs/namesecondary</value></property>

<property><name>mapred.reduce.tasks</name><value>1</value></property>

<property><name>mapred.merge.recordsBeforeProgress</name><value>10000</value></property>

<property><name>mapred.userlog.limit.kb</name><value>0</value></property>

<property><name>webinterface.private.actions</name><value>false</value></property>

<property><name>io.sort.spill.percent</name><value>0.80</value></property>

<property><name>mapred.job.shuffle.input.buffer.percent</name><value>0.70</value></property>

<property><name>mapred.job.split.file</name><value>file:/tmp/dir/hadoop-hadoop/mapred/system/job_201005012019_0002/job.split</value></property>

<property><name>mapred.map.tasks.speculative.execution</name><value>true</value></property>

<property><name>mapred.job.name</name><value>word
count</value></property>

<property><name>hadoop.util.hash.type</name><value>murmur</value></property>

<property><name>mapred.map.max.attempts</name><value>4</value></property>

<property><name>mapred.job.tracker.handler.count</name><value>10</value></property>

<property><name>mapred.tasktracker.expiry.interval</name><value>600000</value></property>

<property><name>mapred.jobtracker.maxtasks.per.job</name><value>-1</value></property>

<property><name>mapred.jobtracker.job.history.block.size</name><value>3145728</value></property>

<property><name>keep.failed.task.files</name><value>false</value></property>

<property><name>mapred.faulty.replica</name><value>1</value></property>

<property><name>ipc.client.tcpnodelay</name><value>false</value></property>

<property><name>mapred.task.profile.reduces</name><value>0-2</value></property>

<property><name>mapred.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>

<property><name>io.map.index.skip</name><value>0</value></property>

<property><name>mapred.working.dir</name><value>file:/home/hadoop/hadoop-0.20.1_bkp</value></property>

<property><name>ipc.server.tcpnodelay</name><value>false</value></property>

<property><name>mapred.used.genericoptionsparser</name><value>true</value></property>

<property><name>mapred.mapper.new-api</name><value>true</value></property>

<property><name>hadoop.logfile.size</name><value>10000000</value></property>

<property><name>mapred.reduce.tasks.speculative.execution</name><value>true</value></property>

<property><name>job.local.dir</name><value>/home/hadoop/isolationrunner/hadoop-hadoop/mapred/local/taskTracker/jobcache/job_201005012019_0002/work</value></property>

<property><name>fs.checkpoint.period</name><value>3600</value></property>

<property><name>mapred.job.reuse.jvm.num.tasks</name><value>1</value></property>

<property><name>mapred.jobtracker.completeuserjobs.maximum</name><value>100</value></property>

<property><name>mapred.job.id</name><value>job_201005012019_0002</value></property>

<property><name>fs.s3.maxRetries</name><value>4</value></property>

<property><name>mapred.task.partition</name><value>0</value></property>

<property><name>keep.failed.tasks.files</name><value>true</value></property>

<property><name>mapred.local.dir</name><value>/home/hadoop/isolationrunner/hadoop-hadoop/mapred/local</value></property>

<property><name>fs.hftp.impl</name><value>org.apache.hadoop.hdfs.HftpFileSystem</value></property>

<property><name>fs.trash.interval</name><value>0</value></property>

<property><name>fs.s3.sleepTimeSeconds</name><value>10</value></property>

<property><name>mapred.submit.replication</name><value>10</value></property>

<property><name>fs.har.impl</name><value>org.apache.hadoop.fs.HarFileSystem</value></property>

<property><name>mapred.map.output.compression.codec</name><value>org.apache.hadoop.io.compress.DefaultCodec</value></property>

<property><name>mapred.tasktracker.dns.interface</name><value>default</value></property>

<property><name>mapred.job.tracker</name><value>localhost:54311</value></property>

<property><name>io.seqfile.sorter.recordlimit</name><value>1000000</value></property>

<property><name>mapred.line.input.format.linespermap</name><value>1</value></property>

<property><name>mapred.jobtracker.taskScheduler</name><value>org.apache.hadoop.mapred.JobQueueTaskScheduler</value></property>

<property><name>mapred.tasktracker.instrumentation</name><value>org.apache.hadoop.mapred.TaskTrackerMetricsInst</value></property>

<property><name>mapred.tasktracker.procfsbasedprocesstree.sleeptime-before-sigkill</name><value>5000</value></property>

<property><name>mapred.local.dir.minspacekill</name><value>0</value></property>

<property><name>mapred.map.replica</name><value>1</value></property>

<property><name>io.sort.record.percent</name><value>0.05</value></property>

<property><name>mapreduce.reduce.class</name><value>org.apache.hadoop.examples.WordCount$IntSumReducer</value></property>

<property><name>fs.kfs.impl</name><value>org.apache.hadoop.fs.kfs.KosmosFileSystem</value></property>

<property><name>mapred.temp.dir</name><value>${hadoop.tmp.dir}/mapred/temp</value></property>

<property><name>mapred.tasktracker.reduce.tasks.maximum</name><value>2</value></property>

<property><name>fs.checkpoint.edits.dir</name><value>${fs.checkpoint.dir}</value></property>

<property><name>mapred.job.reduce.input.buffer.percent</name><value>0.0</value></property>

<property><name>mapred.tasktracker.indexcache.mb</name><value>10</value></property>
<property><name>hadoop.logfile.count</name><value>10</value></property>
<property><name>mapred.skip.reduce.auto.incr.proc.count</name><value>true</value></property>
<property><name>io.seqfile.compress.blocksize</name><value>1000000</value></property>
<property><name>fs.s3.block.size</name><value>67108864</value></property>
<property><name>mapred.tasktracker.taskmemorymanager.monitoring-interval</name><value>5000</value></property>
<property><name>mapred.acls.enabled</name><value>false</value></property>
<property><name>mapred.queue.names</name><value>default</value></property>
<property><name>fs.hsftp.impl</name><value>org.apache.hadoop.hdfs.HsftpFileSystem</value></property>
<property><name>mapred.task.tracker.http.address</name><value>0.0.0.0:50060
</value></property>
<property><name>mapreduce.combine.class</name><value>org.apache.hadoop.examples.WordCount$IntSumReducer</value></property>
<property><name>mapred.reduce.parallel.copies</name><value>5</value></property>
<property><name>io.seqfile.lazydecompress</name><value>true</value></property>
<property><name>mapred.output.dir</name><value>gutenberg-output</value></property>
<property><name>io.sort.mb</name><value>100</value></property>
<property><name>ipc.client.connection.maxidletime</name><value>10000</value></property>
<property><name>mapred.compress.map.output</name><value>false</value></property>
<property><name>mapred.task.tracker.report.address</name><value>127.0.0.1:0
</value></property>
<property><name>ipc.client.kill.max</name><value>10</value></property>
<property><name>ipc.client.connect.max.retries</name><value>10</value></property>
<property><name>mapreduce.map.class</name><value>org.apache.hadoop.examples.WordCount$TokenizerMapper</value></property>
<property><name>fs.s3.impl</name><value>org.apache.hadoop.fs.s3.S3FileSystem</value></property>
<property><name>mapred.input.dir</name><value>file:/home/hadoop/hadoop-0.20.1_bkp/gutenberg</value></property>
<property><name>mapred.job.tracker.http.address</name><value>0.0.0.0:50030
</value></property>
<property><name>io.file.buffer.size</name><value>4096</value></property>
<property><name>mapred.jobtracker.restart.recover</name><value>false</value></property>
<property><name>mapred.tip.id
</name><value>task_201005012019_0002_r_000000_0_0_r</value></property>
<property><name>io.serializations</name><value>org.apache.hadoop.io.serializer.WritableSerialization</value></property>
<property><name>mapred.reduce.copy.backoff</name><value>300</value></property>
<property><name>mapred.task.profile</name><value>false</value></property>
<property><name>jobclient.output.filter</name><value>ALL</value></property>
<property><name>mapred.tasktracker.map.tasks.maximum</name><value>2</value></property>
<property><name>io.compression.codecs</name><value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec</value></property>
<property><name>fs.checkpoint.size</name><value>67108864</value></property>
</configuration>



The question that I've is that, how can I run the IsolationRunner with the
wordcount example?

Thanks,
PSC

Mime
View raw message