hbase-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Nick Dimiduk (JIRA)" <j...@apache.org>
Subject [jira] [Updated] (HBASE-10385) ImportTsv to parse date time from typical loader formats
Date Mon, 03 Feb 2014 21:49:06 GMT

     [ https://issues.apache.org/jira/browse/HBASE-10385?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Nick Dimiduk updated HBASE-10385:
---------------------------------

    Status: Patch Available  (was: Open)

> ImportTsv to parse date time from typical loader formats
> --------------------------------------------------------
>
>                 Key: HBASE-10385
>                 URL: https://issues.apache.org/jira/browse/HBASE-10385
>             Project: HBase
>          Issue Type: New Feature
>          Components: mapreduce
>    Affects Versions: 0.96.1.1
>            Reporter: Vijay Sarvepali
>            Priority: Minor
>              Labels: importtsv
>         Attachments: HBASE-10385.patch
>
>   Original Estimate: 2h
>  Remaining Estimate: 2h
>
> Simple patch to enable parsing of standard date time fields from TSV files into Hbase.
> ***************
> *** 57,62 ****
> --- 57,70 ----
>   import com.google.common.base.Splitter;
>   import com.google.common.collect.Lists;
>   
> + //2013-08-19T04:39:07
> + import java.text.DateFormat;
> + import java.util.*;
> + import java.text.SimpleDateFormat;
> + import java.text.ParseException;
> + 
> + 
> + 
>   /**
>    * Tool to import data from a TSV file.
>    *
> ***************
> *** 220,229 ****
>               getColumnOffset(timestampKeyColumnIndex),
>               getColumnLength(timestampKeyColumnIndex));
>           try {
> !           return Long.parseLong(timeStampStr);
>           } catch (NumberFormatException nfe) {
>             // treat this record as bad record
> !           throw new BadTsvLineException("Invalid timestamp " + timeStampStr);
>           }
>         }
>         
> --- 228,239 ----
>               getColumnOffset(timestampKeyColumnIndex),
>               getColumnLength(timestampKeyColumnIndex));
>           try {
> ! 	    return Long.parseLong(timeStampStr);
>           } catch (NumberFormatException nfe) {
> + 	    // Try this record with string to date in mseconds long
> + 	    return extractTimestampInput(timeStampStr);
>             // treat this record as bad record
> !           //throw new BadTsvLineException("Invalid timestamp " + timeStampStr);
>           }
>         }
>         
> ***************
> *** 243,248 ****
> --- 253,274 ----
>           return lineBytes;
>         }
>       }
> +  public static long extractTimestampInput(String strDate) throws BadTsvLineException{
> +     final List<String> dateFormats = Arrays.asList("yyyy-MM-dd HH:mm:ss.SSS",
"yyyy-MM-dd'T'HH:mm:ss");    
> + 
> +     for(String format: dateFormats){
> +         SimpleDateFormat sdf = new SimpleDateFormat(format);
> +         try{
> +             Date d= sdf.parse(strDate);
> + 	    long msecs = d.getTime();
> + 	    return msecs;
> +         } catch (ParseException e) {
> + 	    //intentionally empty
> +         }
> +     }
> +     // If we come here we have a problem with converting timestamps for this row.
> +     throw new BadTsvLineException("Invalid timestamp " + strDate); 
> +  } 
>   
>       public static class BadTsvLineException extends Exception {
>         public BadTsvLineException(String err) {



--
This message was sent by Atlassian JIRA
(v6.1.5#6160)

Mime
View raw message