hawq-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From wengyanqing <...@git.apache.org>
Subject [GitHub] incubator-hawq pull request #1354: HAWQ-1606. Implement Deciding to Create B...
Date Mon, 16 Apr 2018 10:44:14 GMT
Github user wengyanqing commented on a diff in the pull request:

    https://github.com/apache/incubator-hawq/pull/1354#discussion_r181689211
  
    --- Diff: src/backend/utils/hash/bloomfilter.c ---
    @@ -0,0 +1,134 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + *
    + *   http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing,
    + * software distributed under the License is distributed on an
    + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    + * KIND, either express or implied.  See the License for the
    + * specific language governing permissions and limitations
    + * under the License.
    + */
    +
    +#include "utils/bloomfilter.h"
    +#include "utils/elog.h"
    +#include "utils/palloc.h"
    +#include "lib/stringinfo.h"
    +#include <assert.h>
    +#include <math.h>
    +
    +uint32_t HASH_SEEDS[8] __attribute__((aligned(32))) = { 0x14EBCDFFU,
    +        0x2A1C1A99U, 0x85CB78FBU, 0x6E8F82DDU, 0xF8464DFFU, 0x1028FEADU,
    +        0x74F04A4DU, 0x1832DB75U };
    +
    +static uint32_t getBucketIdx(uint32_t hash, uint32_t mask)
    +{
    +    /* use Knuth's multiplicative hash */
    +    return ((uint64_t) (hash) * 2654435769ul >> 32) & mask;
    +}
    +
    +/*
    + * Returns the smallest power of two that is bigger than v.
    + */
    +int64_t UpperPowerTwo(int64_t v) {
    +    --v;
    +    v |= v >> 1;
    +    v |= v >> 2;
    +    v |= v >> 4;
    +    v |= v >> 8;
    +    v |= v >> 16;
    +    v |= v >> 32;
    +    ++v;
    +    return v;
    +}
    +
    +/*
    + * Initialize a Bloom filter structure with the memory size of Bloom filter.
    + */
    +BloomFilter InitBloomFilter(int memory_size)
    +{
    +    BloomFilter bf;
    +    uint32_t nBuckets = max(1, memory_size/(sizeof(BucketWord)*NUM_BUCKET_WORDS));
    +    size_t size = nBuckets*NUM_BUCKET_WORDS*sizeof(BucketWord);
    +    bf = palloc0(offsetof(BloomFilterData, data)  + size);
    +    bf->nInserted = bf->nTested = bf->nMatched = 0;
    +    bf->nBuckets = nBuckets;
    +    bf->data_mask = bf->nBuckets - 1;
    +    bf->data_size = size;
    +    bf->isCreated = true;
    +    elog(LOG, "Create a Bloom filter with number of buckets:%d, size:%d",
    --- End diff --
    
    Since the query plan could have multiple hash join nodes, this log will be printed more
than once in one query.  Suggest setting log level lower to LOG or use a GUC to switch it.
The same with DestroyBloomFilter.


---

Mime
View raw message