hive-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Brian Salazar <brian.sala...@gmail.com>
Subject Bulk load into HBase
Date Fri, 04 Feb 2011 22:24:45 GMT
I have been using the Bulk Load example here:
http://wiki.apache.org/hadoop/Hive/HBaseBulkLoad

I am having an issue with a bulk load of 1 million records into HBase
on a cluster of 6 using Hive.

Hive 0.6.0 (built from source to get UDFRowSequence)
Hadoop 0.20.2
HBase 0.20.6
Zookeeper 3.3.2

Here is my issue:

hive> set mapred.reduce.tasks=1;

hive> create temporary function row_sequence as
    > 'org.apache.hadoop.hive.contrib.udf.UDFRowSequence';
OK
Time taken: 0.0090 seconds

hive> create external table hb_range_keys(uid_range_start string)
    > row format serde
    > 'org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe'
    > stored as
    > inputformat
    > 'org.apache.hadoop.mapred.TextInputFormat'
    > outputformat
    > 'org.apache.hadoop.hive.ql.io.HiveNullValueSequenceFileOutputFormat'
    > location '/tmp/hb_range_keys';
OK
Time taken: 0.176 seconds

hive> insert overwrite table hb_range_keys
    > select uid from
    > (select uid
    > from cdata_dump
    > tablesample(bucket 1 out of 1000 on uid) s
    > order by uid
    > limit 1000) x
    > where (row_sequence() % 100000)=0
    > order by uid
    > limit 9;
Total MapReduce jobs = 3
Launching Job 1 out of 3
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0036, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0036
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0036
2011-02-04 22:12:25,921 Stage-1 map = 0%,  reduce = 0%
2011-02-04 22:12:28,943 Stage-1 map = 50%,  reduce = 0%
2011-02-04 22:12:31,961 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:12:37,993 Stage-1 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0036
Launching Job 2 out of 3
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0037, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0037
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0037
2011-02-04 22:12:46,645 Stage-2 map = 0%,  reduce = 0%
2011-02-04 22:12:49,658 Stage-2 map = 100%,  reduce = 0%
2011-02-04 22:12:58,695 Stage-2 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0037
Launching Job 3 out of 3
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0038, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0038
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0038
2011-02-04 22:13:06,542 Stage-3 map = 0%,  reduce = 0%
2011-02-04 22:13:09,555 Stage-3 map = 100%,  reduce = 0%
2011-02-04 22:13:18,592 Stage-3 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0038
Loading data to table hb_range_keys
Deleted hdfs://hadoop-1:54310/tmp/hb_range_keys
Table hb_range_keys stats: [num_partitions: 0, num_files: 0, num_rows:
0, total_size: 0]
OK
Time taken: 61.318 seconds

hive> dfs -cp /tmp/hb_range_keys/* /tmp/hb_range_key_list;

hive> dfs -mkdir /tmp/cdata;

hive> set hive.mapred.partitioner=org.apache.hadoop.mapred.lib.TotalOrderPartitioner;

hive> set total.order.partitioner.path=/tmp/hb_range_key_list;

hive> create table cdata(uid string, retail_cat_name1 string,
retail_cat_name2 string, retail_cat_name3 string, bread_crumb_csv
string)
    > stored as
    > INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat'
    > OUTPUTFORMAT 'org.apache.hadoop.hive.hbase.HiveHFileOutputFormat'
    > TBLPROPERTIES ('hfile.family.path' = '/tmp/cdata/cf');
OK
Time taken: 0.103 seconds

hive> desc cdata_dump;
OK
uid     string
retail_cat_name1        string
retail_cat_name2        string
retail_cat_name3        string
bread_crumb_csv string
Time taken: 4.194 seconds

hive> insert overwrite table cdata
    > select uid, retail_cat_name1, retail_cat_name2,
retail_cat_name3, bread_crumb_csv
    > from cdata_dump
    > cluster by uid;
Total MapReduce jobs = 1
Launching Job 1 out of 1
Number of reduce tasks not specified. Defaulting to jobconf value of: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapred.reduce.tasks=<number>
Starting Job = job_201102040059_0039, Tracking URL =
http://Hadoop-1:50030/jobdetails.jsp?jobid=job_201102040059_0039
Kill Command = /usr/local/hadoop-0.20.2/bin/../bin/hadoop job
-Dmapred.job.tracker=hadoop-1:54311 -kill job_201102040059_0039
2011-02-04 22:14:29,724 Stage-1 map = 0%,  reduce = 0%
2011-02-04 22:14:38,754 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:14:47,787 Stage-1 map = 100%,  reduce = 33%
2011-02-04 22:14:50,799 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:14:59,832 Stage-1 map = 100%,  reduce = 33%
2011-02-04 22:15:02,845 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:15:11,880 Stage-1 map = 100%,  reduce = 17%
2011-02-04 22:15:14,893 Stage-1 map = 100%,  reduce = 0%
2011-02-04 22:15:34,965 Stage-1 map = 100%,  reduce = 100%
Ended Job = job_201102040059_0039 with errors
FAILED: Execution Error, return code 2 from
org.apache.hadoop.hive.ql.exec.MapRedTask

Here is the task log:

2011-02-04 22:14:38,240 WARN org.apache.hadoop.conf.Configuration:
/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/job.xml:a
attempt to override final parameter: mapred.hosts.exclude;  Ignoring.
2011-02-04 22:14:38,300 INFO org.apache.hadoop.metrics.jvm.JvmMetrics:
Initializing JVM Metrics with processName=SHUFFLE, sessionId=
2011-02-04 22:14:38,406 WARN org.apache.hadoop.conf.Configuration:
/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/job.xml:a
attempt to override final parameter: mapred.hosts.exclude;  Ignoring.
2011-02-04 22:14:38,453 INFO org.apache.hadoop.mapred.ReduceTask:
ShuffleRamManager: MemoryLimit=130514944,
MaxSingleShuffleLimit=32628736
2011-02-04 22:14:38,461 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread started: Thread for
merging on-disk files
2011-02-04 22:14:38,461 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread started: Thread for
merging in memory files
2011-02-04 22:14:38,461 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread waiting: Thread for
merging on-disk files
2011-02-04 22:14:38,463 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Need another 2 map output(s)
where 0 is already in progress
2011-02-04 22:14:38,463 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread started: Thread for
polling Map Completion Events
2011-02-04 22:14:38,464 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Scheduled 0 outputs (0 slow hosts
and0 dup hosts)
2011-02-04 22:14:38,468 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0: Got 1 new map-outputs
2011-02-04 22:14:42,475 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0: Got 1 new map-outputs
2011-02-04 22:14:43,465 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Scheduled 2 outputs (0 slow hosts
and0 dup hosts)
2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
header: attempt_201102040059_0039_m_000001_0, compressed len:
36393372, decompressed len: 36393368
2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
Shuffling 36393368 bytes (36393372 raw bytes) into Local-FS from
attempt_201102040059_0039_m_000001_0
2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
header: attempt_201102040059_0039_m_000000_0, compressed len:
35832828, decompressed len: 35832824
2011-02-04 22:14:43,496 INFO org.apache.hadoop.mapred.ReduceTask:
Shuffling 35832824 bytes (35832828 raw bytes) into Local-FS from
attempt_201102040059_0039_m_000000_0
2011-02-04 22:14:43,748 INFO org.apache.hadoop.mapred.ReduceTask: Read
36393372 bytes from map-output for
attempt_201102040059_0039_m_000001_0
2011-02-04 22:14:43,750 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread waiting: Thread for
merging on-disk files
2011-02-04 22:14:43,860 INFO org.apache.hadoop.mapred.ReduceTask: Read
35832828 bytes from map-output for
attempt_201102040059_0039_m_000000_0
2011-02-04 22:14:43,862 INFO org.apache.hadoop.mapred.ReduceTask:
attempt_201102040059_0039_r_000000_0 Thread waiting: Thread for
merging on-disk files
2011-02-04 22:14:44,476 INFO org.apache.hadoop.mapred.ReduceTask:
GetMapEventsThread exiting
2011-02-04 22:14:44,476 INFO org.apache.hadoop.mapred.ReduceTask:
getMapsEventsThread joined.
2011-02-04 22:14:44,477 INFO org.apache.hadoop.mapred.ReduceTask:
Closed ram manager
2011-02-04 22:14:44,477 INFO org.apache.hadoop.mapred.ReduceTask:
Interleaved on-disk merge complete: 2 files left.
2011-02-04 22:14:44,477 INFO org.apache.hadoop.mapred.ReduceTask:
In-memory merge complete: 0 files left.
2011-02-04 22:14:44,484 INFO org.apache.hadoop.mapred.ReduceTask:
Merging 2 files, 72226200 bytes from disk
2011-02-04 22:14:44,485 INFO org.apache.hadoop.mapred.ReduceTask:
Merging 0 segments, 0 bytes from memory into reduce
2011-02-04 22:14:44,488 INFO org.apache.hadoop.mapred.Merger: Merging
2 sorted segments
2011-02-04 22:14:44,503 INFO org.apache.hadoop.mapred.Merger: Down to
the last merge-pass, with 2 segments left of total size: 72226192
bytes
2011-02-04 22:14:44,515 INFO ExecReducer: maximum memory = 186449920
2011-02-04 22:14:44,515 INFO ExecReducer: conf classpath =
[file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/classes,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/]
2011-02-04 22:14:44,516 INFO ExecReducer: thread classpath =
[file:/usr/local/hadoop-0.20.2/conf/,
file:/usr/java/jdk1.6.0_23/lib/tools.jar,
file:/usr/local/hadoop-0.20.2/,
file:/usr/local/hadoop-0.20.2/hadoop-0.20.2-core.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-cli-1.2.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-codec-1.3.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-el-1.0.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-httpclient-3.0.1.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-logging-1.0.4.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-logging-api-1.0.4.jar,
file:/usr/local/hadoop-0.20.2/lib/commons-net-1.4.1.jar,
file:/usr/local/hadoop-0.20.2/lib/core-3.1.1.jar,
file:/usr/local/hadoop-0.20.2/lib/hbase-0.20.6.jar,
file:/usr/local/hadoop-0.20.2/lib/hsqldb-1.8.0.10.jar,
file:/usr/local/hadoop-0.20.2/lib/jasper-compiler-5.5.12.jar,
file:/usr/local/hadoop-0.20.2/lib/jasper-runtime-5.5.12.jar,
file:/usr/local/hadoop-0.20.2/lib/jets3t-0.6.1.jar,
file:/usr/local/hadoop-0.20.2/lib/jetty-6.1.14.jar,
file:/usr/local/hadoop-0.20.2/lib/jetty-util-6.1.14.jar,
file:/usr/local/hadoop-0.20.2/lib/junit-3.8.1.jar,
file:/usr/local/hadoop-0.20.2/lib/kfs-0.2.2.jar,
file:/usr/local/hadoop-0.20.2/lib/log4j-1.2.15.jar,
file:/usr/local/hadoop-0.20.2/lib/mockito-all-1.8.0.jar,
file:/usr/local/hadoop-0.20.2/lib/oro-2.0.8.jar,
file:/usr/local/hadoop-0.20.2/lib/servlet-api-2.5-6.1.14.jar,
file:/usr/local/hadoop-0.20.2/lib/slf4j-api-1.4.3.jar,
file:/usr/local/hadoop-0.20.2/lib/slf4j-log4j12-1.4.3.jar,
file:/usr/local/hadoop-0.20.2/lib/xmlenc-0.52.jar,
file:/usr/local/hadoop-0.20.2/lib/zookeeper-3.3.2.jar,
file:/usr/local/hadoop-0.20.2/lib/jsp-2.1/jsp-2.1.jar,
file:/usr/local/hadoop-0.20.2/lib/jsp-2.1/jsp-api-2.1.jar,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/work/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/classes,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/jars/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/hive_hbase-handler.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/zookeeper-3.3.2.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/hbase-0.20.6.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/archive/hadoop-1/mnt/hadoop/tmp/mapred/system/job_201102040059_0039/libjars/hive_contrib.jar/,
file:/mnt/hadoop/tmp/mapred/local/taskTracker/jobcache/job_201102040059_0039/attempt_201102040059_0039_r_000000_0/work/]
2011-02-04 22:14:44,779 INFO ExecReducer:
<OP>Id =3
  <Children>
    <FS>Id =4
      <Parent>Id = 3 null<\Parent>
    <\FS>
  <\Children>
<\OP>
2011-02-04 22:14:44,779 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Initializing Self 3 OP
2011-02-04 22:14:44,785 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Operator 3 OP
initialized
2011-02-04 22:14:44,785 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Initializing children
of 3 OP
2011-02-04 22:14:44,785 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initializing child 4
FS
2011-02-04 22:14:44,785 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initializing Self 4
FS
2011-02-04 22:14:44,812 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Operator 4 FS
initialized
2011-02-04 22:14:44,812 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Initialization Done 4
FS
2011-02-04 22:14:44,812 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: Initialization Done 3
OP
2011-02-04 22:14:44,820 INFO ExecReducer: ExecReducer: processing 1
rows: used memory = 7862736
2011-02-04 22:14:44,820 INFO
org.apache.hadoop.hive.ql.exec.ExtractOperator: 3 forwarding 1 rows
2011-02-04 22:14:44,820 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Final Path: FS
hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/000000_0
2011-02-04 22:14:44,820 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: Writing to temp file:
FS hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/_tmp.000000_0
2011-02-04 22:14:44,820 INFO
org.apache.hadoop.hive.ql.exec.FileSinkOperator: New Final Path: FS
hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/000000_0
2011-02-04 22:14:44,952 INFO org.mortbay.log: Logging to
org.slf4j.impl.Log4jLoggerAdapter(org.mortbay.log) via
org.mortbay.log.Slf4jLog
2011-02-04 22:14:44,952 INFO org.mortbay.log:
Writer=hdfs://hadoop-1:54310/tmp/hive-hadoop/hive_2011-02-04_22-14-24_938_3413312759179707816/_tmp.-ext-10000/_tmp.000000_0/_temporary/_attempt__0000_r_000000_0/cf/4554463474860488518
2011-02-04 22:14:44,960 FATAL ExecReducer:
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
while processing row (tag=0)
{"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":null,"_col3":"|","_col4":null},"alias":0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)
	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
	at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
java.io.IOException: Added a key not lexically larger than previous
key=cfretail_cat_name1-�ۚ, lastkey=cfretail_cat_name3-�ۚ
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:576)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744)
	at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247)
	... 3 more
Caused by: java.io.IOException: Added a key not lexically larger than
previous key=cfretail_cat_name1-�ۚ,
lastkey=cfretail_cat_name3-�ۚ
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.checkKey(HFile.java:551)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:513)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:481)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:95)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:65)
	at org.apache.hadoop.hive.hbase.HiveHFileOutputFormat$2.write(HiveHFileOutputFormat.java:200)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:561)
	... 8 more

2011-02-04 22:14:44,962 WARN org.apache.hadoop.mapred.TaskTracker:
Error running child
java.lang.RuntimeException:
org.apache.hadoop.hive.ql.metadata.HiveException: Hive Runtime Error
while processing row (tag=0)
{"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":null,"_col3":"|","_col4":null},"alias":0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:268)
	at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:463)
	at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:411)
	at org.apache.hadoop.mapred.Child.main(Child.java:170)
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException: Hive
Runtime Error while processing row (tag=0)
{"key":{"reducesinkkey0":""},"value":{"_col0":"","_col1":"|","_col2":null,"_col3":"|","_col4":null},"alias":0}
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:256)
	... 3 more
Caused by: org.apache.hadoop.hive.ql.metadata.HiveException:
java.io.IOException: Added a key not lexically larger than previous
key=cfretail_cat_name1-�ۚ, lastkey=cfretail_cat_name3-�ۚ
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:576)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.Operator.forward(Operator.java:744)
	at org.apache.hadoop.hive.ql.exec.ExtractOperator.processOp(ExtractOperator.java:45)
	at org.apache.hadoop.hive.ql.exec.Operator.process(Operator.java:471)
	at org.apache.hadoop.hive.ql.exec.ExecReducer.reduce(ExecReducer.java:247)
	... 3 more
Caused by: java.io.IOException: Added a key not lexically larger than
previous key=cfretail_cat_name1-�ۚ,
lastkey=cfretail_cat_name3-�ۚ
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.checkKey(HFile.java:551)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:513)
	at org.apache.hadoop.hbase.io.hfile.HFile$Writer.append(HFile.java:481)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:95)
	at org.apache.hadoop.hbase.mapreduce.HFileOutputFormat$1.write(HFileOutputFormat.java:65)
	at org.apache.hadoop.hive.hbase.HiveHFileOutputFormat$2.write(HiveHFileOutputFormat.java:200)
	at org.apache.hadoop.hive.ql.exec.FileSinkOperator.processOp(FileSinkOperator.java:561)
	... 8 more
2011-02-04 22:14:44,966 INFO org.apache.hadoop.mapred.TaskRunner:
Runnning cleanup for the task


Any ideas?


Thanks in advance!

- Brian

Mime
View raw message