hadoop-hdfs-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From steven <commercial...@yahoo.de>
Subject hadoop data structures
Date Tue, 09 Dec 2014 08:47:05 GMT
hi,


i got this code which extracts timeframes frome a logfile and does some 
calculation on it.
input lines looks like this:

1000,T,0,104,1000,1100,27147,80,80,80,80,81,81,98,98,98,101,137,137,139,177,177,177,173,166,149,134,130,124,119,111,104,92


1000,T,1,743,300,300,4976,492,492,492,492,492,497,497,856,856,863,866,875,875,954,954,954,954,954,954,954,954,770,770,770,770,743


1000,T,2,40,800,1000,11922,29,29,29,29,29,29,29,44,46,46,50,51,51,65,65,65,61,52,47,47,47,44,42,40,32,30


2001,T,0,103,6700,7000,44658,80,80,80,80,80,81,96,98,98,101,134,137,139,220,192,176,168,162,156,149,144,132,122,112,104,95


1002,U,....


the first value being the time in ms,
T being the lines im interrested in
0,1,2 being a product ID,
104,743,40,103 being the price i want.


now i need to extract all prices for some specific timeframe, lets say 
3000ms.
the code at the end works but has the problem that the variable 
"numberOfRuns" is counted up and used to calculate the time and i guess 
using this system in hadoop doesnt work.
so i need a way to extract the "timeframes" in the mapper and what data 
structure would you use?






import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;

import java.util.List;

public class Test {

     public List<ArrayList<String>> splitFileByTime(List<String> lines,

int timeFrame) {
         List<ArrayList<String>> myTimes = new 
ArrayList<ArrayList<String>>();


         ArrayList<String> lines_new = new ArrayList<String>();


         for (String z: lines) {
             //System.out.println(z);
         }

         int numberOfRuns = 1;

         for (String current : lines) {
             String[] parts = current.split(",");

             int time = Integer.parseInt(parts[0]);


             if (time < 0) {
                 // Zeiten vor Beginn der Simulation, uninteressant
             } else {



                 if (parts[1].contains("T")) {

                     lines_new.add(current);
                 }
                 else {

                 }
                 if (time >= timeFrame * numberOfRuns) {
                     numberOfRuns++;
                     myTimes.add(lines_new);


                     lines_new = new ArrayList<String>();
                 }

             }
         }
         return myTimes;
     }



     public void getOpenAndClose(List<ArrayList<String>> lines) {

         int abschnitt = 1;
         for (ArrayList<String> x: lines) {
             System.out.println("Abschnitt: " + abschnitt);
             List<Integer> tmp = new ArrayList<Integer>();
             int high = 0;
             int low = 10000;
             for (String y:x) {

                 String[] parts = y.split(",");
                 if (parts[2].contains("0")) {
                     int kurs = Integer.parseInt(parts[3]);
                     if (kurs > high) {
                         high = kurs;
                     }

                     if (kurs < low) {
                         low = kurs;
                     }
                     System.out.println("Produkt: " + parts[2] + " wurde 
um " + parts[0] + " gehandelt mit kurs: " + kurs );
                     tmp.add(kurs);


                 }

             }
             System.out.println("open: " + tmp.get(0));
             System.out.println("close: " + tmp.get(tmp.size()-1));
             System.out.println("high: " + high);
             System.out.println("low: " + low);
             abschnitt++;
         }

     }


     public List<String> readFile(String filename) {

         List<String> lines = new ArrayList<String>();


         BufferedReader reader = null;
         try {
             reader = new BufferedReader(new FileReader(filename));
         } catch (FileNotFoundException e1) {
             e1.printStackTrace();
         }


         String line;

         try {
             while ((line = reader.readLine()) != null) {
                 lines.add(line);

             }
         } catch (IOException e) {
             e.printStackTrace();
         }

         try {
             reader.close();
         } catch (IOException e) {
             // TODO Auto-generated catch block
             e.printStackTrace();
         }


         return lines;
     }




     public static void main(String[] args) {
         //String filename = "Standard-2014-04-29-12-04.csv";
         String filename = "Standard-small.txt";
         //Zeitspanne für Zeilen in Millisekunden
         int timeFrame = 3000;

         Test x = new Test();

         List<String> lines = x.readFile(filename);
         List<ArrayList<String>> lines_split = x.splitFileByTime(lines, 
timeFrame);



         x.getOpenAndClose(lines_split);

     }
}


Mime
View raw message