hadoop-mapreduce-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Shahab Yunus <shahab.yu...@gmail.com>
Subject Re: hadoop data structures
Date Tue, 09 Dec 2014 13:35:16 GMT
Are you asking about the type for the numberOfRuns variable which you are
declaring as a Java primitive int?

If yes, then you can use IntWritable class in Hadoop to define a integer
variable which will work with M/R

Regards,
Shahab

On Tue, Dec 9, 2014 at 3:47 AM, steven <commercials24@yahoo.de> wrote:

>  hi,
>
>
> i got this code which extracts timeframes frome a logfile and does some
> calculation on it.
> input lines looks like this:
>
> 1000,T,0,104,1000,1100,27147,80,80,80,80,81,81,98,98,98,101,137,137,139,177,177,177,173,166,149,134,130,124,119,111,104,92
>
> 1000,T,1,743,300,300,4976,492,492,492,492,492,497,497,856,856,863,866,875,875,954,954,954,954,954,954,954,954,770,770,770,770,743
>
> 1000,T,2,40,800,1000,11922,29,29,29,29,29,29,29,44,46,46,50,51,51,65,65,65,61,52,47,47,47,44,42,40,32,30
>
> 2001,T,0,103,6700,7000,44658,80,80,80,80,80,81,96,98,98,101,134,137,139,220,192,176,168,162,156,149,144,132,122,112,104,95
>
> 1002,U,....
>
>
> the first value being the time in ms,
> T being the lines im interrested in
> 0,1,2 being a product ID,
> 104,743,40,103 being the price i want.
>
>
> now i need to extract all prices for some specific timeframe, lets say
> 3000ms.
> the code at the end works but has the problem that the variable
> "numberOfRuns" is counted up and used to calculate the time and i guess
> using this system in hadoop doesnt work.
> so i need a way to extract the "timeframes" in the mapper and what data
> structure would you use?
>
>
>
>
>
>
> import java.io.BufferedReader;
> import java.io.FileNotFoundException;
> import java.io.FileReader;
> import java.io.IOException;
> import java.util.ArrayList;
>
> import java.util.List;
>
> public class Test {
>
>     public List<ArrayList<String>> splitFileByTime(List<String> lines,
int
> timeFrame) {
>         List<ArrayList<String>> myTimes = new
> ArrayList<ArrayList<String>>();
>
>
>         ArrayList<String> lines_new = new ArrayList<String>();
>
>
>         for (String z: lines) {
>             //System.out.println(z);
>         }
>
>         int numberOfRuns = 1;
>
>         for (String current : lines) {
>             String[] parts = current.split(",");
>
>             int time = Integer.parseInt(parts[0]);
>
>
>             if (time < 0) {
>                 // Zeiten vor Beginn der Simulation, uninteressant
>             } else {
>
>
>
>                 if (parts[1].contains("T")) {
>
>                     lines_new.add(current);
>                 }
>                 else {
>
>                 }
>                 if (time >= timeFrame * numberOfRuns) {
>                     numberOfRuns++;
>                     myTimes.add(lines_new);
>
>
>                     lines_new = new ArrayList<String>();
>                 }
>
>             }
>         }
>         return myTimes;
>     }
>
>
>
>     public void getOpenAndClose(List<ArrayList<String>> lines) {
>
>         int abschnitt = 1;
>         for (ArrayList<String> x: lines) {
>             System.out.println("Abschnitt: " + abschnitt);
>             List<Integer> tmp = new ArrayList<Integer>();
>             int high = 0;
>             int low = 10000;
>             for (String y:x) {
>
>                 String[] parts = y.split(",");
>                 if (parts[2].contains("0")) {
>                     int kurs = Integer.parseInt(parts[3]);
>                     if (kurs > high) {
>                         high = kurs;
>                     }
>
>                     if (kurs < low) {
>                         low = kurs;
>                     }
>                     System.out.println("Produkt: " + parts[2] + " wurde um
> " + parts[0] + " gehandelt mit kurs: " + kurs );
>                     tmp.add(kurs);
>
>
>                 }
>
>             }
>             System.out.println("open: " + tmp.get(0));
>             System.out.println("close: " + tmp.get(tmp.size()-1));
>             System.out.println("high: " + high);
>             System.out.println("low: " + low);
>             abschnitt++;
>         }
>
>     }
>
>
>     public List<String> readFile(String filename) {
>
>         List<String> lines = new ArrayList<String>();
>
>
>         BufferedReader reader = null;
>         try {
>             reader = new BufferedReader(new FileReader(filename));
>         } catch (FileNotFoundException e1) {
>             e1.printStackTrace();
>         }
>
>
>         String line;
>
>         try {
>             while ((line = reader.readLine()) != null) {
>                 lines.add(line);
>
>             }
>         } catch (IOException e) {
>             e.printStackTrace();
>         }
>
>         try {
>             reader.close();
>         } catch (IOException e) {
>             // TODO Auto-generated catch block
>             e.printStackTrace();
>         }
>
>
>         return lines;
>     }
>
>
>
>
>     public static void main(String[] args) {
>         //String filename = "Standard-2014-04-29-12-04.csv";
>         String filename = "Standard-small.txt";
>         //Zeitspanne für Zeilen in Millisekunden
>         int timeFrame = 3000;
>
>         Test x = new Test();
>
>         List<String> lines = x.readFile(filename);
>         List<ArrayList<String>> lines_split = x.splitFileByTime(lines,
> timeFrame);
>
>
>
>         x.getOpenAndClose(lines_split);
>
>     }
> }
>
>

Mime
View raw message