lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From no...@apache.org
Subject [lucene-solr] 01/01: SOLR-14270: implemented and tested
Date Thu, 20 Feb 2020 14:43:36 GMT
This is an automated email from the ASF dual-hosted git repository.

noble pushed a commit to branch jira/solr14270
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git

commit f5aef9deb6cf907b893fc7f5d7e7bee00eb3111c
Author: noble <noble@apache.org>
AuthorDate: Fri Feb 21 01:43:02 2020 +1100

    SOLR-14270: implemented and tested
---
 .../src/java/org/apache/solr/util/ExportTool.java  | 41 ++++++++++++++--------
 1 file changed, 26 insertions(+), 15 deletions(-)

diff --git a/solr/core/src/java/org/apache/solr/util/ExportTool.java b/solr/core/src/java/org/apache/solr/util/ExportTool.java
index ab29800..9ec5a40 100644
--- a/solr/core/src/java/org/apache/solr/util/ExportTool.java
+++ b/solr/core/src/java/org/apache/solr/util/ExportTool.java
@@ -41,6 +41,7 @@ import java.util.concurrent.TimeUnit;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.BiConsumer;
 import java.util.function.Consumer;
+import java.util.zip.GZIPOutputStream;
 
 import com.google.common.collect.ImmutableSet;
 import org.apache.commons.cli.CommandLine;
@@ -185,13 +186,23 @@ public class ExportTool extends SolrCLI.ToolBase {
     info.exportDocs();
   }
 
-  interface DocsSink {
-    default void start() throws IOException {
-    }
+  static abstract class DocsSink {
+    Info info;
+    OutputStream fos;
+
+    abstract void start() throws IOException ;
 
-    void accept(SolrDocument document) throws IOException, InterruptedException;
+    void accept(SolrDocument document) throws IOException {
+      long count = info.docsWritten.incrementAndGet();
+
+      if (count % 100000 == 0) {
+        System.out.println("\nDOCS: " + count);
+      }
 
-    default void end() throws IOException {
+
+    }
+
+    void end() throws IOException {
     }
   }
 
@@ -228,13 +239,10 @@ public class ExportTool extends SolrCLI.ToolBase {
           .create("fields")
   };
 
-  static class JsonSink implements DocsSink {
-    private final Info info;
+  static class JsonSink extends DocsSink {
     private CharArr charArr = new CharArr(1024 * 2);
     JSONWriter jsonWriter = new JSONWriter(charArr, -1);
     private Writer writer;
-    private OutputStream fos;
-    public AtomicLong docs = new AtomicLong();
 
     public JsonSink(Info info) {
       this.info = info;
@@ -243,6 +251,7 @@ public class ExportTool extends SolrCLI.ToolBase {
     @Override
     public void start() throws IOException {
       fos = new FileOutputStream(info.out);
+      if(info.out.endsWith(".json.gz") || info.out.endsWith(".json.")) fos = new GZIPOutputStream(fos);
       if (info.bufferSize > 0) {
         fos = new BufferedOutputStream(fos, info.bufferSize);
       }
@@ -259,7 +268,6 @@ public class ExportTool extends SolrCLI.ToolBase {
 
     @Override
     public synchronized void accept(SolrDocument doc) throws IOException {
-      docs.incrementAndGet();
       charArr.reset();
       Map m = new LinkedHashMap(doc.size());
       doc.forEach((s, field) -> {
@@ -274,13 +282,12 @@ public class ExportTool extends SolrCLI.ToolBase {
       jsonWriter.write(m);
       writer.write(charArr.getArray(), charArr.getStart(), charArr.getEnd());
       writer.append('\n');
+      super.accept(doc);
     }
   }
 
-  private static class JavabinSink implements DocsSink {
-    private final Info info;
+  static class JavabinSink extends DocsSink {
     JavaBinCodec codec;
-    OutputStream fos;
 
     public JavabinSink(Info info) {
       this.info = info;
@@ -289,6 +296,7 @@ public class ExportTool extends SolrCLI.ToolBase {
     @Override
     public void start() throws IOException {
       fos = new FileOutputStream(info.out);
+      if(info.out.endsWith(".json.gz") || info.out.endsWith(".json.")) fos = new GZIPOutputStream(fos);
       if (info.bufferSize > 0) {
         fos = new BufferedOutputStream(fos, info.bufferSize);
       }
@@ -330,6 +338,7 @@ public class ExportTool extends SolrCLI.ToolBase {
       codec.writeTag(SOLRINPUTDOC, sz);
       codec.writeFloat(1f); // document boost
       doc.forEach(bic);
+      super.accept(doc);
     }
   }
 
@@ -339,6 +348,7 @@ public class ExportTool extends SolrCLI.ToolBase {
     SolrDocument EOFDOC = new SolrDocument();
     volatile boolean failed = false;
     Map<String, CoreHandler> corehandlers = new HashMap();
+    private long startTime = System.currentTimeMillis();
 
     public MultiThreadedRunner(String url) {
       super(url);
@@ -362,7 +372,7 @@ public class ExportTool extends SolrCLI.ToolBase {
         addConsumer(consumerlatch);
         addProducers(m);
         if (output != null) {
-          output.println("NO of shards : " + corehandlers.size());
+          output.println("NO: of shards : " + corehandlers.size());
         }
         CountDownLatch producerLatch = new CountDownLatch(corehandlers.size());
         corehandlers.forEach((s, coreHandler) -> producerThreadpool.submit(() -> {
@@ -390,6 +400,8 @@ public class ExportTool extends SolrCLI.ToolBase {
             //ignore
           }
         }
+        System.out.println("\nTotal Docs exported: "+ (docsWritten.get() -1)+
+            ". Time taken: "+( (System.currentTimeMillis() - startTime)/1000) + "secs");
       }
     }
 
@@ -418,7 +430,6 @@ public class ExportTool extends SolrCLI.ToolBase {
           try {
             if (docsWritten.get() > limit) continue;
             sink.accept(doc);
-            docsWritten.incrementAndGet();
           } catch (Exception e) {
             if (output != null) output.println("Failed to write to file " + e.getMessage());
             failed = true;


Mime
View raw message