cassandra-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From tylerho...@apache.org
Subject git commit: Validate ascii and utf8 string literals in cql queries
Date Tue, 14 Oct 2014 17:57:32 GMT
Repository: cassandra
Updated Branches:
  refs/heads/cassandra-2.0 1d02d7b34 -> 781018cb2


Validate ascii and utf8 string literals in cql queries

Patch by Tyler Hobbs; review by Aleksey Yeschenko for CASSANDRA-8101


Project: http://git-wip-us.apache.org/repos/asf/cassandra/repo
Commit: http://git-wip-us.apache.org/repos/asf/cassandra/commit/781018cb
Tree: http://git-wip-us.apache.org/repos/asf/cassandra/tree/781018cb
Diff: http://git-wip-us.apache.org/repos/asf/cassandra/diff/781018cb

Branch: refs/heads/cassandra-2.0
Commit: 781018cb2b8df32291a2ead0d000017a814074ce
Parents: 1d02d7b
Author: Tyler Hobbs <tylerhobbs@apache.org>
Authored: Tue Oct 14 12:56:23 2014 -0500
Committer: Tyler Hobbs <tylerhobbs@apache.org>
Committed: Tue Oct 14 12:57:20 2014 -0500

----------------------------------------------------------------------
 CHANGES.txt                                     |  1 +
 .../apache/cassandra/db/marshal/AsciiType.java  | 27 ++++++++++-
 .../org/apache/cassandra/transport/CBUtil.java  | 51 +++++++++++++++++---
 3 files changed, 71 insertions(+), 8 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/cassandra/blob/781018cb/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index 8b462f1..432750a 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,4 +1,5 @@
 2.0.11:
+ * Properly validate ascii and utf8 string literals in CQL queries (CASSANDRA-8101)
  * (cqlsh) Fix autocompletion for alter keyspace (CASSANDRA-8021)
  * Create backup directories for commitlog archiving during startup (CASSANDRA-8111)
  * Reduce totalBlockFor() for LOCAL_* consistency levels (CASSANDRA-8058)

http://git-wip-us.apache.org/repos/asf/cassandra/blob/781018cb/src/java/org/apache/cassandra/db/marshal/AsciiType.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/db/marshal/AsciiType.java b/src/java/org/apache/cassandra/db/marshal/AsciiType.java
index 74ae6be..fdccfcd 100644
--- a/src/java/org/apache/cassandra/db/marshal/AsciiType.java
+++ b/src/java/org/apache/cassandra/db/marshal/AsciiType.java
@@ -18,8 +18,13 @@
 package org.apache.cassandra.db.marshal;
 
 import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CharacterCodingException;
 
 import org.apache.cassandra.cql3.CQL3Type;
+import org.apache.cassandra.serializers.MarshalException;
 import org.apache.cassandra.serializers.TypeSerializer;
 import org.apache.cassandra.serializers.AsciiSerializer;
 
@@ -29,6 +34,15 @@ public class AsciiType extends AbstractType<String>
 
     AsciiType() {} // singleton
 
+    private final ThreadLocal<CharsetEncoder> encoder = new ThreadLocal<CharsetEncoder>()
+    {
+        @Override
+        protected CharsetEncoder initialValue()
+        {
+            return Charset.forName("US-ASCII").newEncoder();
+        }
+    };
+
     public int compare(ByteBuffer o1, ByteBuffer o2)
     {
         return BytesType.bytesCompare(o1, o2);
@@ -36,7 +50,18 @@ public class AsciiType extends AbstractType<String>
 
     public ByteBuffer fromString(String source)
     {
-        return decompose(source);
+        // the encoder must be reset each time it's used, hence the thread-local storage
+        CharsetEncoder theEncoder = encoder.get();
+        theEncoder.reset();
+
+        try
+        {
+            return theEncoder.encode(CharBuffer.wrap(source));
+        }
+        catch (CharacterCodingException exc)
+        {
+            throw new MarshalException(String.format("Invalid ASCII character in string literal:
%s", exc));
+        }
     }
 
     public CQL3Type asCQL3Type()

http://git-wip-us.apache.org/repos/asf/cassandra/blob/781018cb/src/java/org/apache/cassandra/transport/CBUtil.java
----------------------------------------------------------------------
diff --git a/src/java/org/apache/cassandra/transport/CBUtil.java b/src/java/org/apache/cassandra/transport/CBUtil.java
index dfd0439..eb32faa 100644
--- a/src/java/org/apache/cassandra/transport/CBUtil.java
+++ b/src/java/org/apache/cassandra/transport/CBUtil.java
@@ -21,7 +21,11 @@ import java.net.InetAddress;
 import java.net.InetSocketAddress;
 import java.net.UnknownHostException;
 import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
 import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
@@ -36,6 +40,7 @@ import org.jboss.netty.util.CharsetUtil;
 import org.apache.cassandra.db.ConsistencyLevel;
 import org.apache.cassandra.db.TypeSizes;
 import org.apache.cassandra.utils.UUIDGen;
+import org.apache.cassandra.utils.ByteBufferUtil;
 
 /**
  * ChannelBuffer utility methods.
@@ -48,21 +53,30 @@ public abstract class CBUtil
 {
     private CBUtil() {}
 
+    private final static ThreadLocal<CharsetDecoder> decoder = new ThreadLocal<CharsetDecoder>()
+    {
+        @Override
+        protected CharsetDecoder initialValue()
+        {
+            return Charset.forName("UTF-8").newDecoder();
+        }
+    };
+
     private static String readString(ChannelBuffer cb, int length)
     {
+        if (length == 0)
+            return "";
+
+        ByteBuffer buffer = cb.toByteBuffer(cb.readerIndex(), length);
         try
         {
-            String str = cb.toString(cb.readerIndex(), length, CharsetUtil.UTF_8);
+            String str = decodeString(buffer);
             cb.readerIndex(cb.readerIndex() + length);
             return str;
         }
-        catch (IllegalStateException e)
+        catch (IllegalStateException | CharacterCodingException e)
         {
-            // That's the way netty encapsulate a CCE
-            if (e.getCause() instanceof CharacterCodingException)
-                throw new ProtocolException("Cannot decode string as UTF8");
-            else
-                throw e;
+            throw new ProtocolException("Cannot decode string as UTF8: '" + ByteBufferUtil.bytesToHex(buffer)
+ "'; " + e);
         }
     }
 
@@ -79,6 +93,29 @@ public abstract class CBUtil
         }
     }
 
+    // Taken from Netty's ChannelBuffers.decodeString(). We need to use our own decoder to
properly handle invalid
+    // UTF-8 sequences.  See CASSANDRA-8101 for more details.  This can be removed once https://github.com/netty/netty/pull/2999
+    // is resolved in a release used by Cassandra.
+    private static String decodeString(ByteBuffer src) throws CharacterCodingException
+    {
+        // the decoder needs to be reset every time we use it, hence the copy per thread
+        CharsetDecoder theDecoder = decoder.get();
+        theDecoder.reset();
+
+        final CharBuffer dst = CharBuffer.allocate(
+                (int) ((double) src.remaining() * theDecoder.maxCharsPerByte()));
+
+        CoderResult cr = theDecoder.decode(src, dst, true);
+        if (!cr.isUnderflow())
+            cr.throwException();
+
+        cr = theDecoder.flush(dst);
+        if (!cr.isUnderflow())
+            cr.throwException();
+
+        return dst.flip().toString();
+    }
+
     public static void writeString(String str, ChannelBuffer cb)
     {
         byte[] bytes = str.getBytes(CharsetUtil.UTF_8);


Mime
View raw message