From dev-return-73609-archive-asf-public=cust-asf.ponee.io@zookeeper.apache.org Mon Sep 24 18:11:06 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id D1B67180649 for ; Mon, 24 Sep 2018 18:11:05 +0200 (CEST) Received: (qmail 27778 invoked by uid 500); 24 Sep 2018 16:11:04 -0000 Mailing-List: contact dev-help@zookeeper.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@zookeeper.apache.org Delivered-To: mailing list dev@zookeeper.apache.org Received: (qmail 27760 invoked by uid 99); 24 Sep 2018 16:11:03 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 24 Sep 2018 16:11:03 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 8D7F9E0016; Mon, 24 Sep 2018 16:11:03 +0000 (UTC) From: hanm To: dev@zookeeper.apache.org Reply-To: dev@zookeeper.apache.org References: In-Reply-To: Subject: [GitHub] zookeeper pull request #590: [ZOOKEEPER-1177] Add the memory optimized watch... Content-Type: text/plain Message-Id: <20180924161103.8D7F9E0016@git1-us-west.apache.org> Date: Mon, 24 Sep 2018 16:11:03 +0000 (UTC) Github user hanm commented on a diff in the pull request: https://github.com/apache/zookeeper/pull/590#discussion_r219896267 --- Diff: src/java/main/org/apache/zookeeper/server/watch/BitHashSet.java --- @@ -0,0 +1,156 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.zookeeper.server.watch; + +import java.util.BitSet; +import java.util.Set; +import java.util.HashSet; +import java.util.Iterator; +import java.lang.Iterable; + +import org.apache.zookeeper.server.util.BitMap; + +/** + * Using BitSet to store all the elements, and use HashSet to cache limited + * number of elements to find a balance between memory and time complexity. + * + * Without HashSet, we need to to use O(N) time to get the elements, N is + * the bit numbers in elementBits. But we need to keep the size small to make + * sure it doesn't cost too much in memory, there is a tradeoff between + * memory and time complexity. + * + * Previously, was deciding to dynamically switch between SparseBitSet and + * HashSet based on the memory consumption, but it will take time to copy + * data over and may have some herd effect of keep copying data from one + * data structure to anther. The current solution can do a very good job + * given most of the paths have limited number of elements. + */ +public class BitHashSet implements Iterable { + + static final long serialVersionUID = 6382565447128283568L; + + /** + * Change to SparseBitSet if we we want to optimize more, the number of + * elements on a single server is usually limited, so BitSet should be + * fine. + */ + private final BitSet elementBits = new BitSet(); + private final Set cache = new HashSet(); + + private final int cacheSize; + + // To record how many elements in this set. + private int elementCount = 0; + + public BitHashSet() { + this(Integer.getInteger("zookeeper.bitHashCacheSize", 10)); + } + + public BitHashSet(int cacheSize) { + this.cacheSize = cacheSize; + } + + public synchronized boolean add(Integer elementBit) { + if (elementBit == null || elementBits.get(elementBit)) { + return false; + } + if (cache.size() < cacheSize) { + cache.add(elementBit); + } + elementBits.set(elementBit); + elementCount++; + return true; + } + + /** + * Remove the watches, and return the number of watches being removed. + */ + public synchronized int remove(Set bitSet, BitSet bits) { + cache.removeAll(bitSet); + elementBits.andNot(bits); + int elementCountBefore = elementCount; + elementCount = elementBits.cardinality(); + return elementCountBefore - elementCount; + } + + public synchronized boolean remove(Integer elementBit) { + if (elementBit == null || !elementBits.get(elementBit)) { + return false; + } + + cache.remove(elementBit); + elementBits.clear(elementBit); + elementCount--; + return true; + } + + public synchronized boolean contains(Integer elementBit) { + if (elementBit == null) { + return false; + } + return elementBits.get(elementBit); --- End diff -- would be good to add a comment at the declaration of `cache` variable, stating that its purpose is to optimize iteration. ---