lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tom...@apache.org
Subject [lucene-solr] branch branch_8x updated: LUCENE:8945: Allow to change the output file delimiter on Luke "export terms" feature
Date Wed, 18 Sep 2019 11:18:29 GMT
This is an automated email from the ASF dual-hosted git repository.

tomoko pushed a commit to branch branch_8x
in repository https://gitbox.apache.org/repos/asf/lucene-solr.git


The following commit(s) were added to refs/heads/branch_8x by this push:
     new 91f5d2f  LUCENE:8945: Allow to change the output file delimiter on Luke "export terms"
feature
91f5d2f is described below

commit 91f5d2ff79572d2999a06339b42303d2a483f37d
Author: Amish Shah <shahamish150294@gmail.com>
AuthorDate: Wed Sep 18 19:53:48 2019 +0900

    LUCENE:8945: Allow to change the output file delimiter on Luke "export terms" feature
    
    Signed-off-by: Tomoko Uchida <tomoko@apache.org>
---
 lucene/CHANGES.txt                                 |  2 +-
 .../app/desktop/components/MenuBarProvider.java    |  2 +-
 .../dialog/menubar/ExportTermsDialogFactory.java   | 54 +++++++++++++++++++++-
 .../lucene/luke/models/tools/IndexTools.java       |  3 +-
 .../lucene/luke/models/tools/IndexToolsImpl.java   |  4 +-
 5 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 9eda7c1..be5e7ea 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -26,7 +26,7 @@ New Features
 
 * LUCENE-8936: Add SpanishMinimalStemFilter (vinod kumar via Tomoko Uchida)
 
-* LUCENE-8764: Add "export all terms" feature to Luke. (Leonardo Menezes via Tomoko Uchida)
+* LUCENE-8764 LUCENE-8945: Add "export all terms and doc freqs" feature to Luke with delimiters.
(Leonardo Menezes, Amish Shah via Tomoko Uchida)
 
 * LUCENE-8747: Composite Matches from multiple subqueries now allow access to
   their submatches, and a new NamedMatches API allows marking of subqueries
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
index 3090283..90b2d4f 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/MenuBarProvider.java
@@ -269,7 +269,7 @@ public final class MenuBarProvider {
     }
 
     void showExportTermsDialog(ActionEvent e) {
-      new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 400,
+      new DialogOpener<>(exportTermsDialogFactory).open("Export terms", 600, 450,
           factory -> {
           });
     }
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java
b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java
index 07fe3cf..4710942 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/app/desktop/components/dialog/menubar/ExportTermsDialogFactory.java
@@ -38,8 +38,10 @@ import java.awt.event.ActionEvent;
 import java.io.File;
 import java.io.IOException;
 import java.lang.invoke.MethodHandles;
+import java.util.Arrays;
 import java.util.concurrent.ExecutorService;
 import java.util.concurrent.Executors;
+import java.util.stream.Stream;
 
 import org.apache.logging.log4j.Logger;
 import org.apache.lucene.luke.app.IndexHandler;
@@ -76,6 +78,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
 
   private final JComboBox<String> fieldCombo = new JComboBox<String>();
 
+  private final JComboBox<String> delimiterCombo = new JComboBox<String>();
+
   private final JTextField destDir = new JTextField();
 
   private final JLabel statusLbl = new JLabel();
@@ -88,6 +92,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
 
   private IndexTools toolsModel;
 
+  private String selectedDelimiter;
+
   public synchronized static ExportTermsDialogFactory getInstance() throws IOException {
     if (instance == null) {
       instance = new ExportTermsDialogFactory();
@@ -99,6 +105,8 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
     this.prefs = PreferencesFactory.getInstance();
     this.indexHandler = IndexHandler.getInstance();
     indexHandler.addObserver(new Observer());
+    Stream.of(Delimiter.values()).forEachOrdered(delimiterVal -> delimiterCombo.addItem(delimiterVal.getDescription()));
+    delimiterCombo.setSelectedItem(Delimiter.COMMA.getDescription());//Set default delimiter
   }
 
   @Override
@@ -120,6 +128,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
     panel.add(currentOpenIndexPanel());
     panel.add(fieldComboPanel());
     panel.add(destinationDirPanel());
+    panel.add(delimiterComboPanel());
     panel.add(statusPanel());
     panel.add(actionButtonsPanel());
 
@@ -138,6 +147,14 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
     return panel;
   }
 
+  private JPanel delimiterComboPanel() {
+    JPanel panel = new JPanel(new GridLayout(2, 1));
+    panel.setOpaque(false);
+    panel.add(new JLabel("Select Delimiter: "));
+    panel.add(delimiterCombo);
+    return panel;
+  }
+
   private JPanel fieldComboPanel() {
     JPanel panel = new JPanel(new GridLayout(2, 1));
     panel.setOpaque(false);
@@ -225,9 +242,11 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
           statusLbl.setText("Exporting...");
           indicatorLbl.setVisible(true);
           String field = (String) fieldCombo.getSelectedItem();
+          selectedDelimiter = Delimiter.getSelectedDelimiterValue((String) delimiterCombo.getSelectedItem());
+
           String directory = destDir.getText();
           try {
-            filename = toolsModel.exportTerms(directory, field);
+            filename = toolsModel.exportTerms(directory, field, selectedDelimiter);
           } catch (LukeException e) {
             log.error("Error while exporting terms from field " + field, e);
             statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.error",
e.getMessage()));
@@ -245,7 +264,7 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
         protected void done() {
           indicatorLbl.setVisible(false);
           if (filename != null) {
-            statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success",
filename, "[term],[doc frequency]"));
+            statusLbl.setText(MessageUtils.getLocalizedMessage("export.terms.label.success",
filename, "[term]" + selectedDelimiter + "[doc frequency]"));
           }
         }
       };
@@ -272,4 +291,35 @@ public final class ExportTermsDialogFactory implements DialogOpener.DialogFactor
 
   }
 
+  /**
+   * Delimiters that can be selected
+   */
+  private enum Delimiter {
+    COMMA("Comma", ","), WHITESPACE("Whitespace", " "), TAB("Tab", "\t");
+
+    private final String description;
+    private final String separator;
+
+    private Delimiter(final String description, final String separator) {
+      this.description = description;
+      this.separator = separator;
+    }
+
+    String getDescription() {
+      return this.description;
+    }
+
+    String getSeparator() {
+      return this.separator;
+    }
+
+    static String getSelectedDelimiterValue(String delimiter) {
+      return Arrays.stream(Delimiter.values())
+          .filter(e -> e.description.equals(delimiter))
+          .findFirst()
+          .orElse(COMMA)
+          .getSeparator();
+    }
+  }
+
 }
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java
index 72d5384..a4f4d12 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexTools.java
@@ -100,7 +100,8 @@ public interface IndexTools {
    * Export terms from given field into a new file on the destination directory
    * @param destDir - destination directory
    * @param field - field name
+   * @param delimiter - delimiter to separate terms and their frequency
    * @return The file containing the export
    */
-  String exportTerms(String destDir, String field);
+  String exportTerms(String destDir, String field, String delimiter);
 }
diff --git a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
index f4ca89e..4fdd6e3 100644
--- a/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
+++ b/lucene/luke/src/java/org/apache/lucene/luke/models/tools/IndexToolsImpl.java
@@ -193,7 +193,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools
{
     }
   }
 
-  public String exportTerms(String destDir, String field) {
+  public String exportTerms(String destDir, String field, String delimiter) {
     String filename = "terms_" + field + "_" + System.currentTimeMillis() + ".out";
     Path path = Paths.get(destDir, filename);
     try {
@@ -205,7 +205,7 @@ public final class IndexToolsImpl extends LukeModel implements IndexTools
{
         TermsEnum termsEnum = terms.iterator();
         BytesRef term;
         while (!Thread.currentThread().isInterrupted() && (term = termsEnum.next())
!= null) {
-          writer.write(String.format(Locale.US, "%s,%d\n", term.utf8ToString(), +termsEnum.docFreq()));
+          writer.write(String.format(Locale.US, "%s%s%d\n", term.utf8ToString(), delimiter,
+termsEnum.docFreq()));
         }
         return path.toString();
       }


Mime
View raw message