kudu-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Rotem Gabay <rotemgaba...@gmail.com>
Subject kudu vs parquet
Date Fri, 23 Dec 2016 00:03:00 GMT
Hi,

I have run some performence tests on small scale cluster ( 2 data nodes ,
m4.xlarge aws ec2 machines) .
In order to compare parquet stored table to kudu table  , I have created
the data from  general public police records.
I found out that apart from direct PK access, parquet have outperformed
kudu (on some case on large scale).
I am using KUDU version 1.1.0 .
IS this an expected behaviour ?
Are there any performance guidelines which I should have implemented in
order to balance the outcomes ?
(rule of thumb regarding number of buckets ?)

I would realy appreciate shedding some light.

Regards,
Rotem




[ip-XXX-XX-XX-XX.eu-west-1:21000] > create table hash_range_crimes2_parquet
 STORED AS PARQUET as select * from hash_range_crimes2_raw;


[ip-XXX-XX-XX-XX.eu-west-1:21000] >  CREATE TABLE hash_range_crimes2_kudu (
                                                   >     Crime_ID STRING,
                                                   > Month STRING,
                                                   > Reported_by STRING,
                                                   > Falls_within STRING,
                                                   > Longitude STRING,
                                                   > Latitude STRING,
                                                   > `Location` STRING,
                                                   > LSOA_code STRING,
                                                   > LSOA_name STRING,
                                                   > Crime_type STRING,
                                                   > Last_outcome_category
STRING,
                                                   > Context STRING
                                                   > )
                                                   > DISTRIBUTE BY HASH
(crime_id) INTO 8 BUCKETS
                                                   > TBLPROPERTIES(
                                                   >   'storage_handler' =
'com.cloudera.kudu.hive.KuduStorageHandler',
                                                   >   'kudu.table_name' =
'hash_range_crimes2_kudu',
                                                   >
'kudu.master_addresses' = ' ip-XXX-XX-XX-78.eu-west-1:7051',
                                                   >   'kudu.key_columns' =
'crime_id,month',
                                                   >
 'kudu.num_tablet_replicas' = '2'
                                                   > );




[ip-XXX-XX-XX-XX.eu-west-1:21000] > insert into hash_range_crimes2_kudu
select * from hash_range_crimes2_raw;


[ip-XXX-XX-XX-XX.eu-west-1:21000] > select count(*) from
hash_range_crimes2_parquet;
Query: select count(*) from hash_range_crimes2_parquet
Query submitted at: 2016-12-12 09:39:23 (Coordinator:
http://ip-XXX-XX-XX-XX.eu-west-1:25000)
Query progress can be monitored at:
http://ip-XXX-XX-XX-XX.eu-west-1:25000/query_plan?query_id=6b42a353e6ee0c45:78d0fc7c00000000
+-----------+
| count(*)  |
+-----------+
| 143582880 |
+-----------+
Fetched 1 row(s) in 0.65s
[ip-XXX-XX-XX-XX.eu-west-1:21000] > select count(*) from
hash_range_crimes2_kudu;
Query: select count(*) from hash_range_crimes2_kudu
Query submitted at: 2016-12-12 09:39:27 (Coordinator:
http://ip-XXX-XX-XX-XX.eu-west-1:25000)
Query progress can be monitored at:
http://ip-XXX-XX-XX-XX.eu-west-1:25000/query_plan?query_id=ea48faeb3d2ab999:8be8a73000000000
+-----------+
| count(*)  |
+-----------+
| 143582880 |
+-----------+
Fetched 1 row(s) in 0.87s




[ip-XXX-XX-XX-XX.eu-west-1:21000] > select crime_id,count(*) from
hash_range_crimes2_parquet group by crime_id having count(*) > 1;
Fetched 168704 row(s) in 86.99s

[ip-XXX-XX-XX-XX.eu-west-1:21000] > select crime_id,count(*) from
hash_range_crimes2_kudu group by crime_id having count(*) > 1;
Fetched 168704 row(s) in 93.71s


[ip-XXX-XX-XX-XX.eu-west-1:21000] > select * from
hash_range_crimes2_parquet where crime_id  =
'2489eb2d3157ecc219583d6c307f943c37713c28030e69853312941faede4756234';
Query: select * from hash_range_crimes2_parquet where crime_id  =
'2489eb2d3157ecc219583d6c307f943c37713c28030e69853312941faede4756234'
Query submitted at: 2016-12-12 09:59:28 (Coordinator:
http://ip-XXX-XX-XX-XX.eu-west-1:25000)
Query progress can be monitored at:
http://ip-XXX-XX-XX-XX.eu-west-1:25000/query_plan?query_id=164da1054f84cc04:6d9babd200000000
+---------------------------------------------------------------------+---------+------------------------------------+------------------------------------+-----------+-----------+--------------------------------+-----------+-----------+------------------------------+-----------------------+---------+
| crime_id                                                            |
month   | reported_by                        | falls_within
      | longitude | latitude  | location                       | lsoa_code
| lsoa_name | crime_type                   | last_outcome_category |
context |
+---------------------------------------------------------------------+---------+------------------------------------+------------------------------------+-----------+-----------+--------------------------------+-----------+-----------+------------------------------+-----------------------+---------+
| 2489eb2d3157ecc219583d6c307f943c37713c28030e69853312941faede4756234 |
2015-02 | Police Service of Northern Ireland | Police Service of Northern
Ireland | -5.924925 | 54.599348 | On or near Upper Church Lane   |
  |           | Violence and sexual offences |                       |
    |
| 2489eb2d3157ecc219583d6c307f943c37713c28030e69853312941faede4756234 |
2016-01 | Police Service of Northern Ireland | Police Service of Northern
Ireland | -5.970360 | 54.627343 | On or near Silverstream Parade |
  |           | Violence and sexual offences |                       |
    |
+---------------------------------------------------------------------+---------+------------------------------------+------------------------------------+-----------+-----------+--------------------------------+-----------+-----------+------------------------------+-----------------------+---------+
Fetched 2 row(s) in 73.36s

[ip-XXX-XX-XX-XX.eu-west-1:21000] > select * from hash_range_crimes2_kudu
where crime_id  =
'2489eb2d3157ecc219583d6c307f943c37713c28030e69853312941faede4756234';

Query: select * from hash_range_crimes2_kudu where crime_id  =
'2489eb2d3157ecc219583d6c307f943c37713c28030e69853312941faede4756234'
Query submitted at: 2016-12-12 10:01:13 (Coordinator:
http://ip-XXX-XX-XX-XX.eu-west-1:25000)
Query progress can be monitored at:
http://ip-XXX-XX-XX-XX.eu-west-1:25000/query_plan?query_id=f34f0ca8192ef5c1:30beca2700000000
+---------------------------------------------------------------------+---------+------------------------------------+------------------------------------+-----------+-----------+--------------------------------+-----------+-----------+------------------------------+-----------------------+---------+
| crime_id                                                            |
month   | reported_by                        | falls_within
      | longitude | latitude  | location                       | lsoa_code
| lsoa_name | crime_type                   | last_outcome_category |
context |
+---------------------------------------------------------------------+---------+------------------------------------+------------------------------------+-----------+-----------+--------------------------------+-----------+-----------+------------------------------+-----------------------+---------+
| 2489eb2d3157ecc219583d6c307f943c37713c28030e69853312941faede4756234 |
2015-02 | Police Service of Northern Ireland | Police Service of Northern
Ireland | -5.924925 | 54.599348 | On or near Upper Church Lane   |
  |           | Violence and sexual offences |                       |
    |
| 2489eb2d3157ecc219583d6c307f943c37713c28030e69853312941faede4756234 |
2016-01 | Police Service of Northern Ireland | Police Service of Northern
Ireland | -5.970360 | 54.627343 | On or near Silverstream Parade |
  |           | Violence and sexual offences |                       |
    |
+---------------------------------------------------------------------+---------+------------------------------------+------------------------------------+-----------+-----------+--------------------------------+-----------+-----------+------------------------------+-----------------------+---------+
Fetched 2 row(s) in 1.49s


 [ip-XXX-XX-XX-XX.eu-west-1:21000] >select month,count(*) from
hash_range_crimes2_parquet group by month having count(*) > 1;
 Fetched 29 row(s) in 3.03s


  [ip-XXX-XX-XX-XX.eu-west-1:21000] > select month,count(*) from
hash_range_crimes2_kudu group by month having count(*) > 1;
Fetched 29 row(s) in 17.10s


 [ip-XXX-XX-XX-XX.eu-west-1:21000] > select `location`,count(*) from
hash_range_crimes2_parquet group by `location` ;
Fetched 251776 row(s) in 29.16s

 [ip-XXX-XX-XX-XX.eu-west-1:21000] > select `location`,count(*) from
hash_range_crimes2_kudu group by `location` ;
Fetched 251765 row(s) in 65.67s


 [ip-XXX-XX-XX-XX.eu-west-1:21000] > select reported_by,count(*) from
hash_range_crimes2_parquet group by reported_by having count(*) > 1000000 ;
Fetched 41 row(s) in 4.84s

 [ip-XXX-XX-XX-XX.eu-west-1:21000] > select reported_by,count(*) from
hash_range_crimes2_kudu group by reported_by having count(*) > 1000000 ;
Fetched 41 row(s) in 45.14s

 [ip-XXX-XX-XX-XX.eu-west-1:21000] >SELECT month, COUNT(*) from
hash_range_crimes2_parquet WHERE reported_by = 'Northamptonshire Police'
GROUP BY month;
Fetched 28 row(s) in 1.83s

 [ip-XXX-XX-XX-XX.eu-west-1:21000] > SELECT month, COUNT(*)  from
hash_range_crimes2_kudu WHERE reported_by = 'Northamptonshire Police' GROUP
BY month;
Fetched 28 row(s) in 24.10s

 [ip-XXX-XX-XX-XX.eu-west-1:21000] >SELECT  COUNT(*) from
hash_range_crimes2_parquet WHERE context is not null;
Fetched 1 row(s) in 1.26s

 [ip-XXX-XX-XX-XX.eu-west-1:21000] > SELECT  COUNT(*) from
hash_range_crimes2_kudu WHERE context is not null;
Fetched 1 row(s) in 19.12s

 [ip-XXX-XX-XX-XX.eu-west-1:21000] >SELECT month, COUNT(DISTINCT crime_id)
from hash_range_crimes2_parquet WHERE reported_by = 'Northamptonshire
Police' AND crime_type='Violence and sexual offences' GROUP BY month;

Fetched 28 row(s) in 48.63s

 [ip-XXX-XX-XX-XX.eu-west-1:21000] >SELECT month, COUNT(DISTINCT crime_id)
from hash_range_crimes2_kudu WHERE reported_by = 'Northamptonshire Police'
AND crime_type='Violence and sexual offences' GROUP BY month;

Fetched 28 row(s) in 84.18s

Mime
View raw message