cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "Kevin Wern (JIRA)" <j...@apache.org>
Subject [jira] [Issue Comment Deleted] (CASSANDRA-13848) Allow sstabledump to do a json object per partition to better handle large sstables
Date Tue, 10 Oct 2017 00:15:01 GMT

     [ https://issues.apache.org/jira/browse/CASSANDRA-13848?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]

Kevin Wern updated CASSANDRA-13848:
-----------------------------------
    Comment: was deleted

(was: From 834cab8a0a67dbbefa608ddd47109bb9883025a2 Mon Sep 17 00:00:00 2001
From: Kevin Wern <kevin.m.wern@gmail.com>
Date: Mon, 9 Oct 2017 04:26:25 -0400
Subject: [PATCH] sstabledump: add -l option for jsonl

---
 .../apache/cassandra/tools/JsonTransformer.java    | 35 +++++++++++++++++-----
 .../org/apache/cassandra/tools/SSTableExport.java  |  8 +++++
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/src/java/org/apache/cassandra/tools/JsonTransformer.java b/src/java/org/apache/cassandra/tools/JsonTransformer.java
index e6aaf07..0c7ed7e 100644
--- a/src/java/org/apache/cassandra/tools/JsonTransformer.java
+++ b/src/java/org/apache/cassandra/tools/JsonTransformer.java
@@ -56,6 +56,7 @@ import org.codehaus.jackson.JsonGenerator;
 import org.codehaus.jackson.impl.Indenter;
 import org.codehaus.jackson.util.DefaultPrettyPrinter.NopIndenter;
 import org.codehaus.jackson.util.DefaultPrettyPrinter;
+import org.codehaus.jackson.util.MinimalPrettyPrinter;
 
 public final class JsonTransformer
 {
@@ -78,17 +79,26 @@ public final class JsonTransformer
 
     private long currentPosition = 0;
 
-    private JsonTransformer(JsonGenerator json, ISSTableScanner currentScanner, boolean rawTime,
TableMetadata metadata)
+    private JsonTransformer(JsonGenerator json, ISSTableScanner currentScanner, boolean rawTime,
TableMetadata metadata, boolean isJsonLines)
     {
         this.json = json;
         this.metadata = metadata;
         this.currentScanner = currentScanner;
         this.rawTime = rawTime;
 
-        DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
-        prettyPrinter.indentObjectsWith(objectIndenter);
-        prettyPrinter.indentArraysWith(arrayIndenter);
-        json.setPrettyPrinter(prettyPrinter);
+        if (isJsonLines)
+        {
+            MinimalPrettyPrinter minimalPrettyPrinter = new MinimalPrettyPrinter();
+            minimalPrettyPrinter.setRootValueSeparator("\n");
+            json.setPrettyPrinter(minimalPrettyPrinter);
+        }
+        else
+        {
+            DefaultPrettyPrinter prettyPrinter = new DefaultPrettyPrinter();
+            prettyPrinter.indentObjectsWith(objectIndenter);
+            prettyPrinter.indentArraysWith(arrayIndenter);
+            json.setPrettyPrinter(prettyPrinter);
+        }
     }
 
     public static void toJson(ISSTableScanner currentScanner, Stream<UnfilteredRowIterator>
partitions, boolean rawTime, TableMetadata metadata, OutputStream out)
@@ -96,18 +106,28 @@ public final class JsonTransformer
     {
         try (JsonGenerator json = jsonFactory.createJsonGenerator(new OutputStreamWriter(out,
StandardCharsets.UTF_8)))
         {
-            JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime,
metadata);
+            JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime,
metadata, false);
             json.writeStartArray();
             partitions.forEach(transformer::serializePartition);
             json.writeEndArray();
         }
     }
 
+    public static void toJsonLines(ISSTableScanner currentScanner, Stream<UnfilteredRowIterator>
partitions, boolean rawTime, TableMetadata metadata, OutputStream out)
+            throws IOException
+    {
+        try (JsonGenerator json = jsonFactory.createJsonGenerator(new OutputStreamWriter(out,
StandardCharsets.UTF_8)))
+        {
+            JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime,
metadata, true);
+            partitions.forEach(transformer::serializePartition);
+        }
+    }
+
     public static void keysToJson(ISSTableScanner currentScanner, Stream<DecoratedKey>
keys, boolean rawTime, TableMetadata metadata, OutputStream out) throws IOException
     {
         try (JsonGenerator json = jsonFactory.createJsonGenerator(new OutputStreamWriter(out,
StandardCharsets.UTF_8)))
         {
-            JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime,
metadata);
+            JsonTransformer transformer = new JsonTransformer(json, currentScanner, rawTime,
metadata, false);
             json.writeStartArray();
             keys.forEach(transformer::serializePartitionKey);
             json.writeEndArray();
@@ -221,6 +241,7 @@ public final class JsonTransformer
                 json.writeEndObject();
             }
         }
+
         catch (IOException e)
         {
             String key = metadata.partitionKeyType.getString(partition.partitionKey().getKey());
diff --git a/src/java/org/apache/cassandra/tools/SSTableExport.java b/src/java/org/apache/cassandra/tools/SSTableExport.java
index 95e3ed6..4079ee7 100644
--- a/src/java/org/apache/cassandra/tools/SSTableExport.java
+++ b/src/java/org/apache/cassandra/tools/SSTableExport.java
@@ -62,6 +62,7 @@ public class SSTableExport
     private static final String EXCLUDE_KEY_OPTION = "x";
     private static final String ENUMERATE_KEYS_OPTION = "e";
     private static final String RAW_TIMESTAMPS = "t";
+    private static final String PARTITION_JSON_LINES = "l";
 
     private static final Options options = new Options();
     private static CommandLine cmd;
@@ -88,6 +89,9 @@ public class SSTableExport
 
         Option rawTimestamps = new Option(RAW_TIMESTAMPS, false, "Print raw timestamps instead
of iso8601 date strings");
         options.addOption(rawTimestamps);
+
+        Option partitionJsonLines= new Option(PARTITION_JSON_LINES, false, "Output json lines,
by partition");
+        options.addOption(partitionJsonLines);
     }
 
     /**
@@ -194,6 +198,10 @@ public class SSTableExport
                         });
                     });
                 }
+                else if (cmd.hasOption(PARTITION_JSON_LINES))
+                {
+                    JsonTransformer.toJsonLines(currentScanner, partitions, cmd.hasOption(RAW_TIMESTAMPS),
metadata, System.out);
+                }
                 else
                 {
                     JsonTransformer.toJson(currentScanner, partitions, cmd.hasOption(RAW_TIMESTAMPS),
metadata, System.out);
-- 
2.10.1 (Apple Git-78)
)

> Allow sstabledump to do a json object per partition to better handle large sstables
> -----------------------------------------------------------------------------------
>
>                 Key: CASSANDRA-13848
>                 URL: https://issues.apache.org/jira/browse/CASSANDRA-13848
>             Project: Cassandra
>          Issue Type: New Feature
>          Components: Tools
>            Reporter: Jeff Jirsa
>            Assignee: Kevin Wern
>            Priority: Trivial
>              Labels: lhf
>
> sstable2json / sstabledump make a huge json document of the whole file. For very large
sstables this makes it impossible to load in memory to do anything with it. Allowing users
to Break it into small json objects per partition would be useful.



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@cassandra.apache.org
For additional commands, e-mail: commits-help@cassandra.apache.org


Mime
View raw message