carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From QiangCai <...@git.apache.org>
Subject [GitHub] incubator-carbondata issue #635: [CARBONDATA-782]support SORT_COLUMNS
Date Thu, 16 Mar 2017 09:55:35 GMT
Github user QiangCai commented on the issue:

    https://github.com/apache/incubator-carbondata/pull/635
  
    Data Records : 1 * 1000 * 1000
    ```
    SORT BY ALL DIMENSION: c1,c2,c3,c4,c5
    
    CREATE TABLE IF NOT EXISTS default.carbon_perftest_table
    (c1 STRING, c2 STRING, c3 STRING, c4 STRING, c5 STRING, c6 INT, c7 INT, c8 INT, c9 INT,
c10 INT)
    STORED BY 'org.apache.carbondata.format'
    
    LOAD DATA INPATH '/home/david/Documents/incubator-carbondata/examples/spark/target/store/tempCSV_default_carbon_perftest_table_4113107653051'
    INTO TABLE default.carbon_perftest_table
    OPTIONS ('FILEHEADER' = 'c1,c2,c3,c4,c5,c6,c7,c8,c9,c10', 'USE_KETTLE' = 'false')
    
    load performance: 1782, 3217, 10171
    OLAP Query 0: 736, 1690, 664 [sql: SELECT c3, c4, sum(c8) FROM tableName WHERE c1 = 'P1_23'
and c2 = 'P2_43' GROUP BY c3, c4]
    OLAP Query 1: 527, 1874, 542 [sql: SELECT c2, c3, sum(c9) FROM tableName WHERE c1 = 'P1_432'
and c4 = 'P4_3' and c5 = 'P5_2' GROUP by c2, c3 ]
    OLAP Query 2: 3088, 3973, 2996 [sql: SELECT c2, count(distinct c1), sum(c8) FROM tableName
WHERE c3="P3_4" and c5="P5_4" GROUP BY c2 ]
    OLAP Query 3: 2493, 3710, 2622 [sql: SELECT c2, c5, count(distinct c1), sum(c7) FROM tableName
WHERE c4="P4_4" and c5="P5_7" and c8>4 GROUP BY c2, c5 ]
    Point Query 0: 114, 516, 98 [sql: SELECT c4 FROM tableName WHERE c1="P1_43" ]
    Point Query 1: 126, 664, 99 [sql: SELECT c3 FROM tableName WHERE c1="P1_542" and c2="P2_23"
]
    Point Query 2: 128, 817, 165 [sql: SELECT c3, c5 FROM tableName WHERE c1="P1_52" and c7=4]
    Point Query 3: 113, 530, 155 [sql: SELECT c4, c9 FROM tableName WHERE c1="P1_43" and c8<3]
    Filter Query 0: 209, 1319, 154 [sql: SELECT * FROM tableName WHERE c2="P2_43" ]
    Filter Query 1: 283, 1686, 289 [sql: SELECT * FROM tableName WHERE c3="P3_3"  ]
    Filter Query 2: 319, 1306, 137 [sql: SELECT * FROM tableName WHERE c2="P2_32" and c3="P3_23"
]
    Filter Query 3: 234, 1242, 154 [sql: SELECT * FROM tableName WHERE c3="P3_28" and c4="P4_3"
]
    Scan Query 0: 162, 318, 327 [sql: SELECT sum(c7), sum(c8), avg(c9), max(c10) FROM tableName
]
    Scan Query 1: 107, 406, 97 [sql: SELECT sum(c7) FROM tableName WHERE c2="P2_32" ]
    Scan Query 2: 157, 546, 141 [sql: SELECT sum(c7), sum(c8), sum(9), sum(c10) FROM tableName
WHERE c4="P4_4" ]
    Scan Query 3: 121, 480, 170 [sql: SELECT sum(c7), sum(c8), sum(9), sum(c10) FROM tableName
WHERE c2="P2_75" and c6<5 ]
    Total time: 8924.109771, 21083.679769, 8817.163649
    
    
    SORT_COLUMNS: c1,c3     
    
    
    CREATE TABLE IF NOT EXISTS default.carbon_perftest_table
    (c1 STRING, c2 STRING, c3 STRING, c4 STRING, c5 STRING, c6 INT, c7 INT, c8 INT, c9 INT,
c10 INT)
    STORED BY 'org.apache.carbondata.format'
    TBLPROPERTIES('SORT_COLUMNS'='c1,c3')
          
    LOAD DATA INPATH '/home/david/Documents/incubator-carbondata/examples/spark/target/store/tempCSV_default_carbon_perftest_table_4448597034063'
    INTO TABLE default.carbon_perftest_table
    OPTIONS ('FILEHEADER' = 'c1,c2,c3,c4,c5,c6,c7,c8,c9,c10', 'USE_KETTLE' = 'false')
          
    load performance: 1649, 3108, 9070
    OLAP Query 0: 651, 1567, 615 [sql: SELECT c3, c4, sum(c8) FROM tableName WHERE c1 = 'P1_23'
and c2 = 'P2_43' GROUP BY c3, c4]
    OLAP Query 1: 502, 1792, 448 [sql: SELECT c2, c3, sum(c9) FROM tableName WHERE c1 = 'P1_432'
and c4 = 'P4_3' and c5 = 'P5_2' GROUP by c2, c3 ]
    OLAP Query 2: 3028, 3741, 2600 [sql: SELECT c2, count(distinct c1), sum(c8) FROM tableName
WHERE c3="P3_4" and c5="P5_4" GROUP BY c2 ]
    OLAP Query 3: 2535, 3777, 2704 [sql: SELECT c2, c5, count(distinct c1), sum(c7) FROM tableName
WHERE c4="P4_4" and c5="P5_7" and c8>4 GROUP BY c2, c5 ]
    Point Query 0: 107, 566, 82 [sql: SELECT c4 FROM tableName WHERE c1="P1_43" ]
    Point Query 1: 158, 681, 96 [sql: SELECT c3 FROM tableName WHERE c1="P1_542" and c2="P2_23"
]
    Point Query 2: 151, 747, 149 [sql: SELECT c3, c5 FROM tableName WHERE c1="P1_52" and c7=4]
    Point Query 3: 128, 530, 141 [sql: SELECT c4, c9 FROM tableName WHERE c1="P1_43" and c8<3]
    Filter Query 0: 212, 1292, 124 [sql: SELECT * FROM tableName WHERE c2="P2_43" ]
    Filter Query 1: 214, 1271, 329 [sql: SELECT * FROM tableName WHERE c3="P3_3"  ]
    Filter Query 2: 203, 1216, 102 [sql: SELECT * FROM tableName WHERE c2="P2_32" and c3="P3_23"
]
    Filter Query 3: 274, 1256, 108 [sql: SELECT * FROM tableName WHERE c3="P3_28" and c4="P4_3"
]
    Scan Query 0: 152, 345, 306 [sql: SELECT sum(c7), sum(c8), avg(c9), max(c10) FROM tableName
]
    Scan Query 1: 133, 344, 86 [sql: SELECT sum(c7) FROM tableName WHERE c2="P2_32" ]
    Scan Query 2: 122, 485, 126 [sql: SELECT sum(c7), sum(c8), sum(9), sum(c10) FROM tableName
WHERE c4="P4_4" ]
    Scan Query 3: 141, 451, 168 [sql: SELECT sum(c7), sum(c8), sum(9), sum(c10) FROM tableName
WHERE c2="P2_75" and c6<5 ]
    Total time: 8718.807424, 20070.015716, 8191.425242
    ```


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message