From commits-return-99533-archive-asf-public=cust-asf.ponee.io@lucene.apache.org Thu Mar 15 15:02:37 2018 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by mx-eu-01.ponee.io (Postfix) with SMTP id 5F0DD180649 for ; Thu, 15 Mar 2018 15:02:36 +0100 (CET) Received: (qmail 69192 invoked by uid 500); 15 Mar 2018 14:02:35 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 69183 invoked by uid 99); 15 Mar 2018 14:02:35 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 15 Mar 2018 14:02:35 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 40E25F1810; Thu, 15 Mar 2018 14:02:35 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: ishan@apache.org To: commits@lucene.apache.org Message-Id: X-Mailer: ASF-Git Admin Mailer Subject: lucene-solr:branch_7x: SOLR-11920: Differential file copy for IndexFetcher Date: Thu, 15 Mar 2018 14:02:35 +0000 (UTC) Repository: lucene-solr Updated Branches: refs/heads/branch_7x cf8c9cabd -> 3dece5309 SOLR-11920: Differential file copy for IndexFetcher Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3dece530 Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3dece530 Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3dece530 Branch: refs/heads/branch_7x Commit: 3dece53092b866259bd805d29e6c7e3964ed1d7b Parents: cf8c9ca Author: Ishan Chattopadhyaya Authored: Thu Mar 15 19:31:15 2018 +0530 Committer: Ishan Chattopadhyaya Committed: Thu Mar 15 19:32:19 2018 +0530 ---------------------------------------------------------------------- lucene/tools/junit4/solr-tests.policy | 1 + solr/CHANGES.txt | 4 ++ .../org/apache/solr/handler/IndexFetcher.java | 52 ++++++++++++++------ 3 files changed, 42 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3dece530/lucene/tools/junit4/solr-tests.policy ---------------------------------------------------------------------- diff --git a/lucene/tools/junit4/solr-tests.policy b/lucene/tools/junit4/solr-tests.policy index 9c8a39f..1c46a78 100644 --- a/lucene/tools/junit4/solr-tests.policy +++ b/lucene/tools/junit4/solr-tests.policy @@ -33,6 +33,7 @@ grant { permission java.io.FilePermission "${junit4.tempDir}${/}*", "read,execute,write,delete"; permission java.io.FilePermission "${clover.db.dir}${/}-", "read,execute,write,delete"; permission java.io.FilePermission "${tests.linedocsfile}", "read"; + permission java.nio.file.LinkPermission "hard"; // all possibilities of accepting/binding connections on localhost with ports >=1024: permission java.net.SocketPermission "localhost:1024-", "accept,listen"; http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3dece530/solr/CHANGES.txt ---------------------------------------------------------------------- diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index a581b39..57bcd3b 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -42,6 +42,10 @@ Bug Fixes Optimizations ---------------------- +* SOLR-11920: IndexFetcher now fetches only those files (from master/leader) that are different. This + differential fetching now speeds up recovery times when full index replication is needed, but only + a few segments diverge. (Ishan Chattopadhyaya, Shaun Sabo, John Gallagher) + Other Changes ---------------------- http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3dece530/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java ---------------------------------------------------------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java index 7837bdc..2476a81 100644 --- a/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java +++ b/solr/core/src/java/org/apache/solr/handler/IndexFetcher.java @@ -60,6 +60,8 @@ import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.SegmentInfos; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.FilterDirectory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; @@ -360,7 +362,7 @@ public class IndexFetcher { boolean successfulInstall = false; markReplicationStart(); Directory tmpIndexDir = null; - String tmpIndex; + String tmpIndexDirPath; Directory indexDir = null; String indexDirPath; boolean deleteTmpIdxDir = true; @@ -496,9 +498,9 @@ public class IndexFetcher { String timestamp = new SimpleDateFormat(SnapShooter.DATE_FMT, Locale.ROOT).format(new Date()); String tmpIdxDirName = "index." + timestamp; - tmpIndex = solrCore.getDataDir() + tmpIdxDirName; + tmpIndexDirPath = solrCore.getDataDir() + tmpIdxDirName; - tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndex, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType); + tmpIndexDir = solrCore.getDirectoryFactory().get(tmpIndexDirPath, DirContext.DEFAULT, solrCore.getSolrConfig().indexConfig.lockType); // tmp dir for tlog files if (tlogFilesToDownload != null) { @@ -511,8 +513,9 @@ public class IndexFetcher { try { - //We will compare all the index files from the master vs the index files on disk to see if there is a mismatch - //in the metadata. If there is a mismatch for the same index file then we download the entire index again. + // We will compare all the index files from the master vs the index files on disk to see if there is a mismatch + // in the metadata. If there is a mismatch for the same index file then we download the entire index + // (except when differential copy is applicable) again. if (!isFullCopyNeeded && isIndexStale(indexDir)) { isFullCopyNeeded = true; } @@ -563,7 +566,8 @@ public class IndexFetcher { LOG.info("Starting download (fullCopy={}) to {}", isFullCopyNeeded, tmpIndexDir); successfulInstall = false; - long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir, tmpIndexDir, latestGeneration); + long bytesDownloaded = downloadIndexFiles(isFullCopyNeeded, indexDir, + tmpIndexDir, indexDirPath, tmpIndexDirPath, latestGeneration); if (tlogFilesToDownload != null) { bytesDownloaded += downloadTlogFiles(tmpTlogDir, latestGeneration); reloadCore = true; // reload update log @@ -983,18 +987,26 @@ public class IndexFetcher { * Download the index files. If a new index is needed, download all the files. * * @param downloadCompleteIndex is it a fresh index copy - * @param tmpIndexDir the directory to which files need to be downloadeed to * @param indexDir the indexDir to be merged to + * @param tmpIndexDir the directory to which files need to be downloaded to + * @param indexDirPath the path of indexDir * @param latestGeneration the version number * * @return number of bytes downloaded */ - private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, long latestGeneration) + private long downloadIndexFiles(boolean downloadCompleteIndex, Directory indexDir, Directory tmpIndexDir, + String indexDirPath, String tmpIndexDirPath, long latestGeneration) throws Exception { if (LOG.isDebugEnabled()) { LOG.debug("Download files to dir: " + Arrays.asList(indexDir.listAll())); } long bytesDownloaded = 0; + long bytesSkippedCopying = 0; + boolean doDifferentialCopy = (indexDir instanceof FSDirectory || + (indexDir instanceof FilterDirectory && FilterDirectory.unwrap(indexDir) instanceof FSDirectory)) + && (tmpIndexDir instanceof FSDirectory || + (tmpIndexDir instanceof FilterDirectory && FilterDirectory.unwrap(tmpIndexDir) instanceof FSDirectory)); + for (Map file : filesToDownload) { String filename = (String) file.get(NAME); long size = (Long) file.get(SIZE); @@ -1002,17 +1014,27 @@ public class IndexFetcher { boolean alwaysDownload = filesToAlwaysDownloadIfNoChecksums(filename, size, compareResult); LOG.debug("Downloading file={} size={} checksum={} alwaysDownload={}", filename, size, file.get(CHECKSUM), alwaysDownload); if (!compareResult.equal || downloadCompleteIndex || alwaysDownload) { - dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file, - (String) file.get(NAME), FILE, latestGeneration); - currentFile = file; - dirFileFetcher.fetchFile(); - bytesDownloaded += dirFileFetcher.getBytesDownloaded(); + if (downloadCompleteIndex && doDifferentialCopy && compareResult.equal && compareResult.checkSummed) { + File localFile = new File(indexDirPath, filename); + LOG.info("Don't need to download this file. Local file's path is: {}, checksum is: {}", + localFile.getAbsolutePath(), file.get(CHECKSUM)); + // A hard link here should survive the eventual directory move, and should be more space efficient as + // compared to a file copy. TODO: Maybe we could do a move safely here? + Files.createLink(new File(tmpIndexDirPath, filename).toPath(), localFile.toPath()); + bytesSkippedCopying += localFile.length(); + } else { + dirFileFetcher = new DirectoryFileFetcher(tmpIndexDir, file, + (String) file.get(NAME), FILE, latestGeneration); + currentFile = file; + dirFileFetcher.fetchFile(); + bytesDownloaded += dirFileFetcher.getBytesDownloaded(); + } filesDownloaded.add(new HashMap<>(file)); } else { - LOG.info("Skipping download for " + file.get(NAME) - + " because it already exists"); + LOG.info("Skipping download for {} because it already exists", file.get(NAME)); } } + LOG.info("Bytes downloaded: {}, Bytes skipped downloading: {}", bytesDownloaded, bytesSkippedCopying); return bytesDownloaded; }