kudu-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Todd Lipcon <t...@cloudera.com>
Subject Re: scan performance super bad
Date Mon, 14 May 2018 16:57:00 GMT
On Sun, May 13, 2018 at 9:35 PM, 一米阳光 <710339587@qq.com> wrote:

> hi, super thanks for reply.
> the table schema and partitioning info is:
> Schema
> ColumnIDTypeEncodingCompressionRead defaultWrite default
> *key* 0 string NOT NULL PREFIX_ENCODING LZ4 - -
> metric_value 1 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_00 2 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_01 3 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_02 4 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_03 5 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_04 6 string NOT NULL AUTO_ENCODING LZ4 - -Partition Schema
>
> RANGE (key) (
>     PARTITION VALUES < "005000",
>     PARTITION "005000" <= VALUES < "010000",
>     PARTITION "010000" <= VALUES < "015000",
>     PARTITION "015000" <= VALUES < "020000",
>     PARTITION "020000" <= VALUES < "025000",
>     PARTITION "025000" <= VALUES < "030000",
>     PARTITION "030000" <= VALUES < "035000",
>     PARTITION "035000" <= VALUES < "040000",
>     PARTITION "040000" <= VALUES < "045000",
>     PARTITION "045000" <= VALUES < "050000",
>     PARTITION "050000" <= VALUES < "055000",
>     PARTITION "055000" <= VALUES < "060000",
>     PARTITION "060000" <= VALUES < "065000",
>     PARTITION "065000" <= VALUES < "070000",
>     PARTITION "070000" <= VALUES < "075000",
>     PARTITION "075000" <= VALUES < "080000",
>     PARTITION "080000" <= VALUES < "085000",
>     PARTITION "085000" <= VALUES < "090000",
>     PARTITION "090000" <= VALUES < "095000",
>     PARTITION "095000" <= VALUES < "100000",
>     PARTITION "100000" <= VALUES < "1000000",
>     PARTITION "1000000" <= VALUES < "105000",
>     PARTITION "105000" <= VALUES < "110000",
>     PARTITION "110000" <= VALUES < "115000",
>     PARTITION "115000" <= VALUES < "120000",
>     PARTITION "120000" <= VALUES < "125000",
>     PARTITION "125000" <= VALUES < "130000",
>     PARTITION "130000" <= VALUES < "135000",
>     PARTITION "135000" <= VALUES < "140000",
>     PARTITION "140000" <= VALUES < "145000",
>     PARTITION "145000" <= VALUES < "150000",
>     PARTITION "150000" <= VALUES < "155000",
>     PARTITION "155000" <= VALUES < "160000",
>     PARTITION "160000" <= VALUES < "165000",
>     PARTITION "165000" <= VALUES < "170000",
>     PARTITION "170000" <= VALUES < "175000",
>     PARTITION "175000" <= VALUES < "180000",
>     PARTITION "180000" <= VALUES < "185000",
>     PARTITION "185000" <= VALUES < "190000",
>     PARTITION "190000" <= VALUES < "195000",
>     PARTITION "195000" <= VALUES < "200000",
>     PARTITION "200000" <= VALUES < "205000",
>     PARTITION "205000" <= VALUES < "210000",
>     PARTITION "210000" <= VALUES < "215000",
>     PARTITION "215000" <= VALUES < "220000",
>     PARTITION "220000" <= VALUES < "225000",
>     PARTITION "225000" <= VALUES < "230000",
>     PARTITION "230000" <= VALUES < "235000",
>     PARTITION "235000" <= VALUES < "240000",
>     PARTITION "240000" <= VALUES < "245000",
>     PARTITION "245000" <= VALUES < "250000",
>     PARTITION "250000" <= VALUES < "255000",
>     PARTITION "255000" <= VALUES < "260000",
>     PARTITION "260000" <= VALUES < "265000",
>     PARTITION "265000" <= VALUES < "270000",
>     PARTITION "270000" <= VALUES < "275000",
>     PARTITION "275000" <= VALUES < "280000",
>     PARTITION "280000" <= VALUES < "285000",
>     PARTITION "285000" <= VALUES < "290000",
>     PARTITION "290000" <= VALUES < "295000",
>     PARTITION "295000" <= VALUES < "300000",
>     PARTITION "300000" <= VALUES < "305000",
>     PARTITION "305000" <= VALUES < "310000",
>     PARTITION "310000" <= VALUES < "315000",
>     PARTITION "315000" <= VALUES < "320000",
>     PARTITION "320000" <= VALUES < "325000",
>     PARTITION "325000" <= VALUES < "330000",
>     PARTITION "330000" <= VALUES < "335000",
>     PARTITION "335000" <= VALUES < "340000",
>     PARTITION "340000" <= VALUES < "345000",
>     PARTITION "345000" <= VALUES < "350000",
>     PARTITION "350000" <= VALUES < "355000",
>     PARTITION "355000" <= VALUES < "360000",
>     PARTITION "360000" <= VALUES < "365000",
>     PARTITION "365000" <= VALUES < "370000",
>     PARTITION "370000" <= VALUES < "375000",
>     PARTITION "375000" <= VALUES < "380000",
>     PARTITION "380000" <= VALUES < "385000",
>     PARTITION "385000" <= VALUES < "390000",
>     PARTITION "390000" <= VALUES < "395000",
>     PARTITION "395000" <= VALUES < "400000",
>     PARTITION "400000" <= VALUES < "405000",
>     PARTITION "405000" <= VALUES < "410000",
>     PARTITION "410000" <= VALUES < "415000",
>     PARTITION "415000" <= VALUES < "420000",
>     PARTITION "420000" <= VALUES < "425000",
>     PARTITION "425000" <= VALUES < "430000",
>     PARTITION "430000" <= VALUES < "435000",
>     PARTITION "435000" <= VALUES < "440000",
>     PARTITION "440000" <= VALUES < "445000",
>     PARTITION "445000" <= VALUES < "450000",
>     PARTITION "450000" <= VALUES < "455000",
>     PARTITION "455000" <= VALUES < "460000",
>     PARTITION "460000" <= VALUES < "465000",
>     PARTITION "465000" <= VALUES < "470000",
>     PARTITION "470000" <= VALUES < "475000",
>     PARTITION "475000" <= VALUES < "480000",
>     PARTITION "480000" <= VALUES < "485000",
>     PARTITION "485000" <= VALUES < "490000",
>     PARTITION "490000" <= VALUES < "495000",
>     PARTITION "495000" <= VALUES < "500000",
>     PARTITION "500000" <= VALUES < "505000",
>     PARTITION "505000" <= VALUES < "510000",
>     PARTITION "510000" <= VALUES < "515000",
>     PARTITION "515000" <= VALUES < "520000",
>     PARTITION "520000" <= VALUES < "525000",
>     PARTITION "525000" <= VALUES < "530000",
>     PARTITION "530000" <= VALUES < "535000",
>     PARTITION "535000" <= VALUES < "540000",
>     PARTITION "540000" <= VALUES < "545000",
>     PARTITION "545000" <= VALUES < "550000",
>     PARTITION "550000" <= VALUES < "555000",
>     PARTITION "555000" <= VALUES < "560000",
>     PARTITION "560000" <= VALUES < "565000",
>     PARTITION "565000" <= VALUES < "570000",
>     PARTITION "570000" <= VALUES < "575000",
>     PARTITION "575000" <= VALUES < "580000",
>     PARTITION "580000" <= VALUES < "585000",
>     PARTITION "585000" <= VALUES < "590000",
>     PARTITION "590000" <= VALUES < "595000",
>     PARTITION "595000" <= VALUES < "600000",
>     PARTITION "600000" <= VALUES < "605000",
>     PARTITION "605000" <= VALUES < "610000",
>     PARTITION "610000" <= VALUES < "615000",
>     PARTITION "615000" <= VALUES < "620000",
>     PARTITION "620000" <= VALUES < "625000",
>     PARTITION "625000" <= VALUES < "630000",
>     PARTITION "630000" <= VALUES < "635000",
>     PARTITION "635000" <= VALUES < "640000",
>     PARTITION "640000" <= VALUES < "645000",
>     PARTITION "645000" <= VALUES < "650000",
>     PARTITION "650000" <= VALUES < "655000",
>     PARTITION "655000" <= VALUES < "660000",
>     PARTITION "660000" <= VALUES < "665000",
>     PARTITION "665000" <= VALUES < "670000",
>     PARTITION "670000" <= VALUES < "675000",
>     PARTITION "675000" <= VALUES < "680000",
>     PARTITION "680000" <= VALUES < "685000",
>     PARTITION "685000" <= VALUES < "690000",
>     PARTITION "690000" <= VALUES < "695000",
>     PARTITION "695000" <= VALUES < "700000",
>     PARTITION "700000" <= VALUES < "705000",
>     PARTITION "705000" <= VALUES < "710000",
>     PARTITION "710000" <= VALUES < "715000",
>     PARTITION "715000" <= VALUES < "720000",
>     PARTITION "720000" <= VALUES < "725000",
>     PARTITION "725000" <= VALUES < "730000",
>     PARTITION "730000" <= VALUES < "735000",
>     PARTITION "735000" <= VALUES < "740000",
>     PARTITION "740000" <= VALUES < "745000",
>     PARTITION "745000" <= VALUES < "750000",
>     PARTITION "750000" <= VALUES < "755000",
>     PARTITION "755000" <= VALUES < "760000",
>     PARTITION "760000" <= VALUES < "765000",
>     PARTITION "765000" <= VALUES < "770000",
>     PARTITION "770000" <= VALUES < "775000",
>     PARTITION "775000" <= VALUES < "780000",
>     PARTITION "780000" <= VALUES < "785000",
>     PARTITION "785000" <= VALUES < "790000",
>     PARTITION "790000" <= VALUES < "795000",
>     PARTITION "795000" <= VALUES < "800000",
>     PARTITION "800000" <= VALUES < "805000",
>     PARTITION "805000" <= VALUES < "810000",
>     PARTITION "810000" <= VALUES < "815000",
>     PARTITION "815000" <= VALUES < "820000",
>     PARTITION "820000" <= VALUES < "825000",
>     PARTITION "825000" <= VALUES < "830000",
>     PARTITION "830000" <= VALUES < "835000",
>     PARTITION "835000" <= VALUES < "840000",
>     PARTITION "840000" <= VALUES < "845000",
>     PARTITION "845000" <= VALUES < "850000",
>     PARTITION "850000" <= VALUES < "855000",
>     PARTITION "855000" <= VALUES < "860000",
>     PARTITION "860000" <= VALUES < "865000",
>     PARTITION "865000" <= VALUES < "870000",
>     PARTITION "870000" <= VALUES < "875000",
>     PARTITION "875000" <= VALUES < "880000",
>     PARTITION "880000" <= VALUES < "885000",
>     PARTITION "885000" <= VALUES < "890000",
>     PARTITION "890000" <= VALUES < "895000",
>     PARTITION "895000" <= VALUES < "900000",
>     PARTITION "900000" <= VALUES < "905000",
>     PARTITION "905000" <= VALUES < "910000",
>     PARTITION "910000" <= VALUES < "915000",
>     PARTITION "915000" <= VALUES < "920000",
>     PARTITION "920000" <= VALUES < "925000",
>     PARTITION "925000" <= VALUES < "930000",
>     PARTITION "930000" <= VALUES < "935000",
>     PARTITION "935000" <= VALUES < "940000",
>     PARTITION "940000" <= VALUES < "945000",
>     PARTITION "945000" <= VALUES < "950000",
>     PARTITION "950000" <= VALUES < "955000",
>     PARTITION "955000" <= VALUES < "960000",
>     PARTITION "960000" <= VALUES < "965000",
>     PARTITION "965000" <= VALUES < "970000",
>     PARTITION "970000" <= VALUES < "975000",
>     PARTITION "975000" <= VALUES < "980000",
>     PARTITION "980000" <= VALUES < "985000",
>     PARTITION "985000" <= VALUES < "990000",
>     PARTITION "990000" <= VALUES < "995000",
>     PARTITION VALUES >= "995000"
> )
>
>
>
So it looks like you have a numeric value being stored here in the string
column. Are you sure that you are properly zero-padding when creating your
key? For example if you accidentally scan from "50_..." to "80_..." you
will end up scanning a huge portion of your table.


> i did not delete rows in this table ever.
>
> my scanner code is below:
> buildKey method will build the lower bound and the upper bound, the unique
> id is same, the startRow offset(third part) is 0, and the endRow offset is
> 99999999, startRow and endRow only differs from time.
> though the max offset is big(9999999), generally it is less than 100.
>
> private KuduScanner buildScanner(Metric startRow, Metric endRow, List<Integer>
dimensionIds, List<DimensionFilter> dimensionFilterList) {
>     KuduTable kuduTable = kuduService.getKuduTable(BizConfig.parseFrom(startRow.getBizId()));
>
>     PartialRow lower = kuduTable.getSchema().newPartialRow();
>     lower.addString("key", buildKey(startRow));
>     PartialRow upper = kuduTable.getSchema().newPartialRow();
>     upper.addString("key", buildKey(endRow));
>
>     LOG.info("build scanner. lower = {}, upper = {}", buildKey(startRow), buildKey(endRow));
>
>     KuduScanner.KuduScannerBuilder builder = kuduService.getKuduClient().newScannerBuilder(kuduTable);
>     builder.setProjectedColumnNames(COLUMNS);
>     builder.lowerBound(lower);
>     builder.exclusiveUpperBound(upper);
>     builder.prefetching(true);
>     builder.batchSizeBytes(MAX_BATCH_SIZE);
>
>     if (CollectionUtils.isNotEmpty(dimensionFilterList)) {
>         for (int i = 0; i < dimensionIds.size() && i < MAX_DIMENSION_NUM;
i++) {
>             for (DimensionFilter dimensionFilter : dimensionFilterList) {
>                 if (!Objects.equals(dimensionFilter.getDimensionId(), dimensionIds.get(i)))
{
>                     continue;
>                 }
>                 ColumnSchema columnSchema = kuduTable.getSchema().getColumn(String.format("dimension_%02d",
i));
>                 KuduPredicate predicate = buildKuduPredicate(columnSchema, dimensionFilter);
>                 if (predicate != null) {
>                     builder.addPredicate(predicate);
>                     LOG.info("add predicate. predicate = {}", predicate.toString());
>                 }
>             }
>         }
>     }
>     return builder.build();
> }
>
>
What client version are you using? 1.7.0?


> i checked the metrics, only get content below, it seems no relationship
> with my table.
>

Looks like you got the metrics from the kudu master, not a tablet server.
You need to figure out which tablet server you are scanning and grab the
metrics from that one.

-Todd
-- 
Todd Lipcon
Software Engineer, Cloudera

Mime
View raw message