pinot-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [incubator-pinot] siddharthteotia commented on a change in pull request #5086: Remove the support of the old Star-Tree
Date Wed, 26 Feb 2020 23:51:39 GMT
siddharthteotia commented on a change in pull request #5086: Remove the support of the old
Star-Tree
URL: https://github.com/apache/incubator-pinot/pull/5086#discussion_r384837002
 
 

 ##########
 File path: pinot-core/src/main/java/org/apache/pinot/core/segment/creator/impl/stats/StringColumnPreIndexStatsCollector.java
 ##########
 @@ -26,124 +26,82 @@
 
 
 public class StringColumnPreIndexStatsCollector extends AbstractColumnStatisticsCollector
{
-  private String min;
-  private String max;
+  private final ObjectSet<String> _values = new ObjectOpenHashSet<>(INITIAL_HASH_SET_SIZE);
 
-  private int smallestStringLength = Integer.MAX_VALUE;
-  private int longestStringLength = 0;
-  private final ObjectSet<String> rawStringSet;
-  private final ObjectSet<String> aggregatedStringSet;
-  private String[] sortedStringList;
-  private boolean sealed = false;
+  private int _minLength = Integer.MAX_VALUE;
+  private int _maxLength = 0;
+  private String[] _sortedValues;
+  private boolean _sealed = false;
 
   public StringColumnPreIndexStatsCollector(String column, StatsCollectorConfig statsCollectorConfig)
{
     super(column, statsCollectorConfig);
-    rawStringSet = new ObjectOpenHashSet<>(INITIAL_HASH_SET_SIZE);
-    aggregatedStringSet = new ObjectOpenHashSet<>(INITIAL_HASH_SET_SIZE);
   }
 
-  /**
-   * Collect statistics for the given entry.
-   * - Add it to the passed in set (which could be raw or aggregated)
-   * - Update maximum number of values for Multi-valued entries
-   * - Update Total number of entries
-   * - Check if entry is sorted.
-   * @param entry
-   * @param set
-   */
-  private void collectEntry(Object entry, ObjectSet<String> set) {
-
+  @Override
+  public void collect(Object entry) {
     if (entry instanceof Object[]) {
-      for (final Object e : (Object[]) entry) {
-        String value = e.toString();
-        set.add(value);
-
-        int valueLength = StringUtil.encodeUtf8(value).length;
-        smallestStringLength = Math.min(smallestStringLength, valueLength);
-        longestStringLength = Math.max(longestStringLength, valueLength);
-      }
-      if (maxNumberOfMultiValues < ((Object[]) entry).length) {
-        maxNumberOfMultiValues = ((Object[]) entry).length;
+      Object[] values = (Object[]) entry;
+      for (Object obj : values) {
+        String value = (String) obj;
+        _values.add(value);
+
+        int length = StringUtil.encodeUtf8(value).length;
+        _minLength = Math.min(_minLength, length);
+        _maxLength = Math.max(_maxLength, length);
       }
-      updateTotalNumberOfEntries((Object[]) entry);
-    } else {
 
-      String value;
-      if (entry != null) {
-        value = entry.toString();
-      } else {
-        value = fieldSpec.getDefaultNullValue().toString();
 
 Review comment:
   Ignore. Got it.... this is STRING column

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


Mime
View raw message