From commits-return-16533-archive-asf-public=cust-asf.ponee.io@pinot.apache.org Wed Feb 26 23:51:01 2020 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id D9D6418065C for ; Thu, 27 Feb 2020 00:51:00 +0100 (CET) Received: (qmail 6258 invoked by uid 500); 26 Feb 2020 23:51:00 -0000 Mailing-List: contact commits-help@pinot.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@pinot.apache.org Delivered-To: mailing list commits@pinot.apache.org Received: (qmail 6249 invoked by uid 99); 26 Feb 2020 23:51:00 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 26 Feb 2020 23:51:00 +0000 From: GitBox To: commits@pinot.apache.org Subject: [GitHub] [incubator-pinot] siddharthteotia commented on a change in pull request #5086: Remove the support of the old Star-Tree Message-ID: <158276106023.32011.3467900184864676727.gitbox@gitbox.apache.org> References: In-Reply-To: Date: Wed, 26 Feb 2020 23:51:00 -0000 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit siddharthteotia commented on a change in pull request #5086: Remove the support of the old Star-Tree URL: https://github.com/apache/incubator-pinot/pull/5086#discussion_r384837002 ########## File path: pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/stats/StringColumnPreIndexStatsCollector.java ########## @@ -26,124 +26,82 @@ public class StringColumnPreIndexStatsCollector extends AbstractColumnStatisticsCollector { - private String min; - private String max; + private final ObjectSet _values = new ObjectOpenHashSet<>(INITIAL_HASH_SET_SIZE); - private int smallestStringLength = Integer.MAX_VALUE; - private int longestStringLength = 0; - private final ObjectSet rawStringSet; - private final ObjectSet aggregatedStringSet; - private String[] sortedStringList; - private boolean sealed = false; + private int _minLength = Integer.MAX_VALUE; + private int _maxLength = 0; + private String[] _sortedValues; + private boolean _sealed = false; public StringColumnPreIndexStatsCollector(String column, StatsCollectorConfig statsCollectorConfig) { super(column, statsCollectorConfig); - rawStringSet = new ObjectOpenHashSet<>(INITIAL_HASH_SET_SIZE); - aggregatedStringSet = new ObjectOpenHashSet<>(INITIAL_HASH_SET_SIZE); } - /** - * Collect statistics for the given entry. - * - Add it to the passed in set (which could be raw or aggregated) - * - Update maximum number of values for Multi-valued entries - * - Update Total number of entries - * - Check if entry is sorted. - * @param entry - * @param set - */ - private void collectEntry(Object entry, ObjectSet set) { - + @Override + public void collect(Object entry) { if (entry instanceof Object[]) { - for (final Object e : (Object[]) entry) { - String value = e.toString(); - set.add(value); - - int valueLength = StringUtil.encodeUtf8(value).length; - smallestStringLength = Math.min(smallestStringLength, valueLength); - longestStringLength = Math.max(longestStringLength, valueLength); - } - if (maxNumberOfMultiValues < ((Object[]) entry).length) { - maxNumberOfMultiValues = ((Object[]) entry).length; + Object[] values = (Object[]) entry; + for (Object obj : values) { + String value = (String) obj; + _values.add(value); + + int length = StringUtil.encodeUtf8(value).length; + _minLength = Math.min(_minLength, length); + _maxLength = Math.max(_maxLength, length); } - updateTotalNumberOfEntries((Object[]) entry); - } else { - String value; - if (entry != null) { - value = entry.toString(); - } else { - value = fieldSpec.getDefaultNullValue().toString(); Review comment: Ignore. Got it.... this is STRING column ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: users@infra.apache.org With regards, Apache Git Services --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org For additional commands, e-mail: commits-help@pinot.apache.org