pinot-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [pinot] Jackie-Jiang commented on a change in pull request #7301: mark and sweep indices for V3 segment format
Date Wed, 18 Aug 2021 00:29:52 GMT

Jackie-Jiang commented on a change in pull request #7301:
URL: https://github.com/apache/pinot/pull/7301#discussion_r690801817



##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/IndexKey.java
##########
@@ -46,7 +46,7 @@ public boolean equals(Object o) {
     if (this == o) {
       return true;
     }
-    if (o == null || getClass() != o.getClass()) {
+    if (!(o instanceof IndexKey)) {

Review comment:
       Any specific reason changing this? There are 2 flavors, one allows child class equal
to the parent class, another doesn't. I think we usually perform the class check

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
##########
@@ -329,47 +323,67 @@ private void persistIndexMap(IndexEntry entry)
       throws IOException {
     File mapFile = new File(_segmentDirectory, V1Constants.INDEX_MAP_FILE_NAME);
     try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(mapFile,
true)))) {
-      String startKey = getKey(entry.key.name, entry.key.type.getIndexName(), true);
-
-      StringBuilder sb = new StringBuilder();
-      sb.append(startKey).append(" = ").append(entry.startOffset);
-      writer.println(sb.toString());
-
-      String endKey = getKey(entry.key.name, entry.key.type.getIndexName(), false);
-      sb = new StringBuilder();
-      sb.append(endKey).append(" = ").append(entry.size);
-      writer.println(sb.toString());
+      persistIndexMap(entry, writer);
     }
   }
 
-  private String getKey(String column, String indexName, boolean isStartOffset) {
-    return column + MAP_KEY_SEPARATOR + indexName + MAP_KEY_SEPARATOR + (isStartOffset ?
"startOffset" : "size");
-  }
-
   private String allocationContext(IndexKey key) {
     return this.getClass().getSimpleName() + key.toString();
   }
 
+  /**
+   * This method sweeps the indices marked for removal. Exception is simply bubbled up w/o
+   * trying to recover disk states from failure. This method is expected to run during segment
+   * reloading, which has failure handling by creating a backup folder before doing reloading.
+   */
+  private void cleanupRemovedIndices()
+      throws IOException {
+    if (!_shouldCleanupRemovedIndices) {
+      return;
+    }
+
+    File tmpIdxFile = new File(_segmentDirectory, V1Constants.INDEX_FILE_NAME + ".tmp");
+    // Sort indices by column name and index type while copying, so that the
+    // new index_map file is easy to inspect for troubleshooting.
+    List<IndexEntry> retained = copyIndicesTo(_indexFile, new TreeMap<>(_columnEntries),
tmpIdxFile);

Review comment:
       Suggest handling the sorting within the `copyIndices()` so that it doesn't rely on
the caller to sort

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
##########
@@ -387,4 +401,53 @@ public boolean isIndexRemovalSupported() {
   public String toString() {
     return _segmentDirectory.toString() + "/" + _indexFile.toString();
   }
+
+  @VisibleForTesting
+  static List<IndexEntry> copyIndicesTo(File indexFile, Map<IndexKey, IndexEntry>
toBeRetained, File tmpIdxFile)
+      throws IOException {
+    // Copy index from original index file and append to temp file.
+    // Keep track of the index entry pointing to the temp index file.
+    List<IndexEntry> retained = new ArrayList<>();
+    long nextOffset = 0;
+    // With FileChannel, we can seek to the data flexibly.
+    try (FileChannel srcCh = new RandomAccessFile(indexFile, "r").getChannel();
+        FileChannel dstCh = new RandomAccessFile(tmpIdxFile, "rw").getChannel()) {
+      for (Map.Entry<IndexKey, IndexEntry> next : toBeRetained.entrySet()) {
+        IndexKey key = next.getKey();
+        IndexEntry idxPos = next.getValue();
+        copyIndexTo(srcCh, idxPos.startOffset, idxPos.size, dstCh);

Review comment:
       I feel inlining it is more readable

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
##########
@@ -110,6 +120,7 @@ public SingleFileIndexDirectory(File segmentDirectory, SegmentMetadataImpl
segme
     }
     _columnEntries = new HashMap<>(_segmentMetadata.getAllColumns().size());
     _allocBuffers = new ArrayList<>();
+    _shouldCleanupRemovedIndices = false;

Review comment:
       (nit) no need to explicitly initialize it

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
##########
@@ -387,4 +401,53 @@ public boolean isIndexRemovalSupported() {
   public String toString() {
     return _segmentDirectory.toString() + "/" + _indexFile.toString();
   }
+
+  @VisibleForTesting
+  static List<IndexEntry> copyIndicesTo(File indexFile, Map<IndexKey, IndexEntry>
toBeRetained, File tmpIdxFile)

Review comment:
       Also add some javadoc

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/IndexEntry.java
##########
@@ -27,7 +28,7 @@
 class IndexEntry {
   private static Logger LOGGER = LoggerFactory.getLogger(IndexEntry.class);
 
-  IndexKey key;
+  final IndexKey key;

Review comment:
       We can still fix the format by prefixing the underscore and remove `this.`

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/IndexKey.java
##########
@@ -26,11 +26,11 @@
 /**
  * Class representing index name and type
  */
-public class IndexKey {
+public class IndexKey implements Comparable<IndexKey> {
   private static Logger LOGGER = LoggerFactory.getLogger(IndexKey.class);
 
-  String name;
-  ColumnIndexType type;
+  final String name;

Review comment:
       Fix the code style

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/TextIndexUtils.java
##########
@@ -0,0 +1,38 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.segment.store;
+
+import java.io.File;
+import org.apache.commons.io.FileUtils;
+import org.apache.pinot.segment.spi.V1Constants;
+
+
+class TextIndexUtils {

Review comment:
       Add a private constructor

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
##########
@@ -387,4 +401,53 @@ public boolean isIndexRemovalSupported() {
   public String toString() {
     return _segmentDirectory.toString() + "/" + _indexFile.toString();
   }
+
+  @VisibleForTesting
+  static List<IndexEntry> copyIndicesTo(File indexFile, Map<IndexKey, IndexEntry>
toBeRetained, File tmpIdxFile)

Review comment:
       Rename the parameters
   ```suggestion
     static List<IndexEntry> copyIndices(File srcFile, File destFile, Map<IndexKey,
IndexEntry> indicesToCopy)
   ```

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
##########
@@ -387,4 +401,53 @@ public boolean isIndexRemovalSupported() {
   public String toString() {
     return _segmentDirectory.toString() + "/" + _indexFile.toString();
   }
+
+  @VisibleForTesting
+  static List<IndexEntry> copyIndicesTo(File indexFile, Map<IndexKey, IndexEntry>
toBeRetained, File tmpIdxFile)
+      throws IOException {
+    // Copy index from original index file and append to temp file.
+    // Keep track of the index entry pointing to the temp index file.
+    List<IndexEntry> retained = new ArrayList<>();
+    long nextOffset = 0;
+    // With FileChannel, we can seek to the data flexibly.
+    try (FileChannel srcCh = new RandomAccessFile(indexFile, "r").getChannel();
+        FileChannel dstCh = new RandomAccessFile(tmpIdxFile, "rw").getChannel()) {
+      for (Map.Entry<IndexKey, IndexEntry> next : toBeRetained.entrySet()) {
+        IndexKey key = next.getKey();
+        IndexEntry idxPos = next.getValue();

Review comment:
       Rename the variable?

##########
File path: pinot-segment-local/src/main/java/org/apache/pinot/segment/local/segment/store/SingleFileIndexDirectory.java
##########
@@ -329,47 +323,67 @@ private void persistIndexMap(IndexEntry entry)
       throws IOException {
     File mapFile = new File(_segmentDirectory, V1Constants.INDEX_MAP_FILE_NAME);
     try (PrintWriter writer = new PrintWriter(new BufferedWriter(new FileWriter(mapFile,
true)))) {
-      String startKey = getKey(entry.key.name, entry.key.type.getIndexName(), true);
-
-      StringBuilder sb = new StringBuilder();
-      sb.append(startKey).append(" = ").append(entry.startOffset);
-      writer.println(sb.toString());
-
-      String endKey = getKey(entry.key.name, entry.key.type.getIndexName(), false);
-      sb = new StringBuilder();
-      sb.append(endKey).append(" = ").append(entry.size);
-      writer.println(sb.toString());
+      persistIndexMap(entry, writer);
     }
   }
 
-  private String getKey(String column, String indexName, boolean isStartOffset) {
-    return column + MAP_KEY_SEPARATOR + indexName + MAP_KEY_SEPARATOR + (isStartOffset ?
"startOffset" : "size");
-  }
-
   private String allocationContext(IndexKey key) {
     return this.getClass().getSimpleName() + key.toString();
   }
 
+  /**
+   * This method sweeps the indices marked for removal. Exception is simply bubbled up w/o
+   * trying to recover disk states from failure. This method is expected to run during segment
+   * reloading, which has failure handling by creating a backup folder before doing reloading.
+   */
+  private void cleanupRemovedIndices()
+      throws IOException {
+    if (!_shouldCleanupRemovedIndices) {

Review comment:
       I'd suggest moving this check to the caller side (under the `close()`)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org

For queries about this service, please contact Infrastructure at:
users@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org


Mime
View raw message