hadoop-mapreduce-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From unmesha sreeveni <unmeshab...@gmail.com>
Subject Custom FileInputFormat not able to pass value to mapper
Date Wed, 11 Jun 2014 10:15:34 GMT
Below is my code. What my code does is

I stored the entire file into MyFileInputFormat. And from that I extracted
the line I want.Basically it will be a couple of characters starting from 0
to 6000

*public class MyFileInputFormat extends FileInputFormat<Text, Text> {*

*@Override*
* public RecordReader<Text, Text> createRecordReader(InputSplit split,*
*   TaskAttemptContext context) {*
* return new MyWholeFileReader();*
* }*

* @Override*
* protected boolean isSplitable(JobContext context, Path file) {*
* System.out.println("isSplitable");*
* return false;*

* }*


 * public static class MyWholeFileReader extends RecordReader<Text, Text> {*

* private CompressionCodecFactory compressionCodecs = null;*
* private long start;*
* private long end;*
* private long pos;*
* private LineReader in;*
* int counter = 0;*
* int i = 0;*
* String header = null;*
* int headerIndex = 0;*
* int footerIndex = 0;*
* private Text key = null;*
* private Text value = null;*
* private Text buffer = new Text();*
* StringBuilder sb = new StringBuilder();*

* public void initialize(InputSplit genericSplit,*
* TaskAttemptContext context) throws IOException {*

* FileSplit split = (FileSplit) genericSplit;*
* Configuration job = context.getConfiguration();*
* start = split.getStart();*
* this.end = start + split.getLength();*
* this.pos = start;*
* final Path file = split.getPath();*
* compressionCodecs = new CompressionCodecFactory(job);*
* final CompressionCodec codec = compressionCodecs.getCodec(file);*
* FileSystem fs = file.getFileSystem(job);*
* FSDataInputStream fileIn = fs.open(split.getPath());*
* if (codec != null) {*
* in = new LineReader(codec.createInputStream(fileIn), job);*
* }*
* else {*
* in = new LineReader(fileIn, job);*
* }*
* if (key == null) {*
* key = new Text();*
* }*
* key.set(split.getPath().getName());*
* if (value == null) {*
* value = new Text();*
* }*
* }*
* public boolean nextKeyValue() throws IOException {*
* System.out.println("nextKeyValue");*
* int itr = 0;*

* if(itr == 0){*
* int newSize = 0;*
* newSize = in.readLine(buffer);*
* int index = 0;*
* while (newSize > 0) {*
* String str = buffer.toString();*
* sb.append(str);*
* sb.append("\n");*
* newSize = in.readLine(buffer);*
* if(sb.toString().contains("6000")){*
* counter ++;*
* }*
* }*
* }*
* /**
* * Loop through string builder*
* */*
* String[] lines = sb.toString().split("\\n");*
* for(String s: lines){*
 * if(s.contains("^6000")){*
* i++;*
* }*
* }*
* /**
* * differentiating header,body and footer*
* */*
* String[] lines1 = sb.toString().split("\\n");*
* StringBuilder temp = new StringBuilder();*
* for(String getVal: lines1){*
* if(getVal.startsWith("6000")){*
* temp.append(getVal);*
* i --;*
* break;*
* }*
* else{*
* temp.append(getVal);*
* }*
* temp.append("\n");*
 * }*
* System.out.println("temp = " + temp.toString());*
* value.set(temp.toString());*
* sb.delete(0, temp.toString().length());*
* /**
* * Stopping condition*
* */*
* if(i == -1){*
*                                 sb.delete(0, sb.length());*
* }*
* if (sb.length() == 0) {*
* key = null;*
* value = null;*
* return false;*
* }*
* else {*
* value.set(temp.toString());*
* return true;*
* }*
* }*
* @Override*
* public Text getCurrentKey() {*
* return key;*
* }*
* @Override*
* public Text getCurrentValue() {*
* return value;*
* }*
* /***
* * *
* * Get the progress within the split*
* */*

* public float getProgress() {*
* return 0.0f;*
* }*
* public synchronized void close() throws IOException {*
* if (in != null) {*
* in.close();*
* }*
* }*
* }*
*}*
I am able to get my desired value in MyFileInputFormat,But these values are
not entering into Mapper. And my desired value to reach into mapper is in
temp.toString(). But it is not entering into my Map().

Am I doing anything wrong.

Please suggest.


-- 
*Thanks & Regards *


*Unmesha Sreeveni U.B*
*Hadoop, Bigdata Developer*
*Center for Cyber Security | Amrita Vishwa Vidyapeetham*
http://www.unmeshasreeveni.blogspot.in/

Mime
View raw message