lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From markrmil...@apache.org
Subject lucene-solr:master: SOLR-9204: Improve performance of getting directory size with hdfs.
Date Mon, 13 Jun 2016 16:23:19 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/master 72914198e -> 08c14f135


SOLR-9204: Improve performance of getting directory size with hdfs.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/08c14f13
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/08c14f13
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/08c14f13

Branch: refs/heads/master
Commit: 08c14f135639beddc0c33c0c087962f8b5f88f33
Parents: 7291419
Author: markrmiller <markrmiller@apache.org>
Authored: Mon Jun 13 12:22:50 2016 -0400
Committer: markrmiller <markrmiller@apache.org>
Committed: Mon Jun 13 12:22:50 2016 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  2 ++
 .../solr/core/CachingDirectoryFactory.java      |  6 +++-
 .../org/apache/solr/core/DirectoryFactory.java  | 25 +++++++++++++++
 .../apache/solr/core/HdfsDirectoryFactory.java  | 33 ++++++++++++++++++++
 .../apache/solr/handler/ReplicationHandler.java |  2 +-
 .../solr/handler/admin/CoreAdminOperation.java  |  2 +-
 .../HdfsWriteToMultipleCollectionsTest.java     | 19 +++++++++++
 7 files changed, 86 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/08c14f13/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index c886fd0..82b8760 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -254,6 +254,8 @@ Optimizations
 
 * SOLR-8744: Overseer operations performed with fine grained mutual exclusion (noble, Scott
Blum)
 
+* SOLR-9204: Improve performance of getting directory size with hdfs. (Mark Miller)
+
 Other Changes
 ----------------------
 * SOLR-8860: Remove back-compat handling of router format made in SOLR-4221 in 4.5.0. (shalin)

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/08c14f13/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
index 11cf479..5b7ad1b 100644
--- a/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/CachingDirectoryFactory.java
@@ -497,7 +497,7 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory
{
     }
     return livePaths;
   }
-
+  
   @Override
   protected boolean deleteOldIndexDirectory(String oldDirPath) throws IOException {
     Set<String> livePaths = getLivePaths();
@@ -508,4 +508,8 @@ public abstract class CachingDirectoryFactory extends DirectoryFactory
{
 
     return super.deleteOldIndexDirectory(oldDirPath);
   }
+  
+  protected synchronized String getPath(Directory directory) {
+    return byDirectoryCache.get(directory).path;
+  }
 }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/08c14f13/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java
index 8cc9e7d..f953913 100644
--- a/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/DirectoryFactory.java
@@ -146,6 +146,31 @@ public abstract class DirectoryFactory implements NamedListInitializedPlugin,
   public abstract void remove(String path) throws IOException;
   
   /**
+   * @param directory to calculate size of
+   * @return size in bytes
+   * @throws IOException on low level IO error
+   */
+  public long size(Directory directory) throws IOException {
+    return sizeOfDirectory(directory);
+  }
+  
+  /**
+   * @param path to calculate size of
+   * @return size in bytes
+   * @throws IOException on low level IO error
+   */
+  public long size(String path) throws IOException {
+    Directory dir = get(path, DirContext.DEFAULT, null);
+    long size;
+    try {
+      size = sizeOfDirectory(dir);
+    } finally {
+      release(dir); 
+    }
+    return size;
+  }
+  
+  /**
    * Override for more efficient moves.
    * 
    * Intended for use with replication - use

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/08c14f13/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
index 25f094e..ada4af3 100644
--- a/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
+++ b/solr/core/src/java/org/apache/solr/core/HdfsDirectoryFactory.java
@@ -31,6 +31,7 @@ import java.util.concurrent.TimeUnit;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.FsStatus;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathFilter;
 import org.apache.hadoop.security.UserGroupInformation;
@@ -410,6 +411,38 @@ public class HdfsDirectoryFactory extends CachingDirectoryFactory implements
Sol
         + cd.getDataDir()));
   }
   
+  /**
+   * @param directory to calculate size of
+   * @return size in bytes
+   * @throws IOException on low level IO error
+   */
+  @Override
+  public long size(Directory directory) throws IOException {
+    String hdfsDirPath = getPath(directory);
+    return size(hdfsDirPath);
+  }
+  
+  /**
+   * @param path to calculate size of
+   * @return size in bytes
+   * @throws IOException on low level IO error
+   */
+  @Override
+  public long size(String path) throws IOException {
+    Path hdfsDirPath = new Path(path);
+    FileSystem fileSystem = null;
+    try {
+      fileSystem = FileSystem.newInstance(hdfsDirPath.toUri(), getConf());
+      long size = fileSystem.getContentSummary(hdfsDirPath).getLength();
+      return size;
+    } catch (IOException e) {
+      LOG.error("Error checking if hdfs path exists", e);
+      throw new SolrException(ErrorCode.SERVER_ERROR, "Error checking if hdfs path exists",
e);
+    } finally {
+      IOUtils.closeQuietly(fileSystem);
+    }
+  }
+  
   public String getConfDir() {
     return confDir;
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/08c14f13/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
index 3785db7..14898d7 100644
--- a/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
+++ b/solr/core/src/java/org/apache/solr/handler/ReplicationHandler.java
@@ -761,7 +761,7 @@ public class ReplicationHandler extends RequestHandlerBase implements
SolrCoreAw
     try {
       dir = core.getDirectoryFactory().get(core.getIndexDir(), DirContext.DEFAULT, core.getSolrConfig().indexConfig.lockType);
       try {
-        size = DirectoryFactory.sizeOfDirectory(dir);
+        size = core.getDirectoryFactory().size(dir);
       } finally {
         core.getDirectoryFactory().release(dir);
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/08c14f13/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
----------------------------------------------------------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
index 51e776d..3fdf3ef 100644
--- a/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
+++ b/solr/core/src/java/org/apache/solr/handler/admin/CoreAdminOperation.java
@@ -988,7 +988,7 @@ enum CoreAdminOperation {
       dir = core.getDirectoryFactory().get(core.getIndexDir(), DirectoryFactory.DirContext.DEFAULT,
core.getSolrConfig().indexConfig.lockType);
 
       try {
-        size = DirectoryFactory.sizeOfDirectory(dir);
+        size = core.getDirectoryFactory().size(dir);
       } finally {
         core.getDirectoryFactory().release(dir);
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/08c14f13/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java
b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java
index ddf6e51..043cdec 100644
--- a/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java
+++ b/solr/core/src/test/org/apache/solr/cloud/hdfs/HdfsWriteToMultipleCollectionsTest.java
@@ -22,8 +22,13 @@ import java.util.Collection;
 import java.util.List;
 
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.hdfs.MiniDFSCluster;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.NRTCachingDirectory;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.LuceneTestCase.Nightly;
@@ -130,6 +135,20 @@ public class HdfsWriteToMultipleCollectionsTest extends BasicDistributedZkTest
{
         if (core.getCoreDescriptor().getCloudDescriptor().getCollectionName()
             .startsWith(ACOLLECTION)) {
           assertTrue(core.getDirectoryFactory() instanceof HdfsDirectoryFactory);
+          Directory dir = core.getDirectoryFactory().get(core.getDataDir(), null, null);
+          try {
+            long dataDirSize = core.getDirectoryFactory().size(dir);
+            FileSystem fileSystem = null;
+            
+            fileSystem = FileSystem.newInstance(
+                new Path(core.getDataDir()).toUri(), new Configuration());
+            long size = fileSystem.getContentSummary(
+                new Path(core.getDataDir())).getLength();
+            assertEquals(size, dataDirSize);
+          } finally {
+            core.getDirectoryFactory().release(dir);
+          }
+          
           RefCounted<IndexWriter> iwRef = core.getUpdateHandler()
               .getSolrCoreState().getIndexWriter(core);
           try {


Mime
View raw message