hbase-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From st...@apache.org
Subject svn commit: r923381 - in /hadoop/hbase/trunk: CHANGES.txt core/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java
Date Mon, 15 Mar 2010 18:36:29 GMT
Author: stack
Date: Mon Mar 15 18:36:29 2010
New Revision: 923381

URL: http://svn.apache.org/viewvc?rev=923381&view=rev
Log:
HBASE-2323 filter.RegexStringComparator does not work with certain bytes

Modified:
    hadoop/hbase/trunk/CHANGES.txt
    hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java

Modified: hadoop/hbase/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/CHANGES.txt?rev=923381&r1=923380&r2=923381&view=diff
==============================================================================
--- hadoop/hbase/trunk/CHANGES.txt (original)
+++ hadoop/hbase/trunk/CHANGES.txt Mon Mar 15 18:36:29 2010
@@ -239,6 +239,8 @@ Release 0.21.0 - Unreleased
    HBASE-2023  Client sync block can cause 1 thread of a multi-threaded client
                to block all others (Karthik Ranganathan via Stack)
    HBASE-2305  Client port for ZK has no default (Suraj Varma via Stack)
+   HBASE-2323  filter.RegexStringComparator does not work with certain bytes
+               (Benoit Sigoure via Stack)
 
   IMPROVEMENTS
    HBASE-1760  Cleanup TODOs in HTable

Modified: hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java
URL: http://svn.apache.org/viewvc/hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java?rev=923381&r1=923380&r2=923381&view=diff
==============================================================================
--- hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java
(original)
+++ hadoop/hbase/trunk/core/src/main/java/org/apache/hadoop/hbase/filter/RegexStringComparator.java
Mon Mar 15 18:36:29 2010
@@ -19,20 +19,26 @@
  */
 package org.apache.hadoop.hbase.filter;
 
+import org.apache.hadoop.hbase.HConstants;
 import org.apache.hadoop.hbase.util.Bytes;
 
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
 import java.util.regex.Pattern;
 
 /**
- * This comparator is for use with {@link CompareFilter} implementations, such 
- * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for 
- * filtering based on the value of a given column. Use it to test if a given 
+ * This comparator is for use with {@link CompareFilter} implementations, such
+ * as {@link RowFilter}, {@link QualifierFilter}, and {@link ValueFilter}, for
+ * filtering based on the value of a given column. Use it to test if a given
  * regular expression matches a cell value in the column.
  * <p>
- * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator. 
+ * Only EQUAL or NOT_EQUAL comparisons are valid with this comparator.
  * <p>
  * For example:
  * <p>
@@ -50,6 +56,10 @@ import java.util.regex.Pattern;
  */
 public class RegexStringComparator extends WritableByteArrayComparable {
 
+  private static final Log LOG = LogFactory.getLog(RegexStringComparator.class);
+
+  private Charset charset = Charset.forName(HConstants.UTF8_ENCODING);
+
   private Pattern pattern;
 
   /** Nullary constructor for Writable, do not use */
@@ -61,26 +71,50 @@ public class RegexStringComparator exten
    */
   public RegexStringComparator(String expr) {
     super(Bytes.toBytes(expr));
-    this.pattern = Pattern.compile(expr);
+    this.pattern = Pattern.compile(expr, Pattern.DOTALL);
+  }
+
+  /**
+   * Specifies the {@link Charset} to use to convert the row key to a String.
+   * <p>
+   * The row key needs to be converted to a String in order to be matched
+   * against the regular expression.  This method controls which charset is
+   * used to do this conversion.
+   * <p>
+   * If the row key is made of arbitrary bytes, the charset {@code ISO-8859-1}
+   * is recommended.
+   * @param charset The charset to use.
+   */
+  public void setCharset(final Charset charset) {
+    this.charset = charset;
   }
 
   @Override
   public int compareTo(byte[] value) {
     // Use find() for subsequence match instead of matches() (full sequence
     // match) to adhere to the principle of least surprise.
-    return pattern.matcher(Bytes.toString(value)).find() ? 0 : 1;
+    return pattern.matcher(new String(value, charset)).find() ? 0 : 1;
   }
 
   @Override
   public void readFields(DataInput in) throws IOException {
-    String expr = in.readUTF();
+    final String expr = in.readUTF();
     this.value = Bytes.toBytes(expr);
     this.pattern = Pattern.compile(expr);
+    final String charset = in.readUTF();
+    if (charset.length() > 0) {
+      try {
+        this.charset = Charset.forName(charset);
+      } catch (IllegalCharsetNameException e) {
+        LOG.error("invalid charset", e);
+      }
+    }
   }
 
   @Override
   public void write(DataOutput out) throws IOException {
     out.writeUTF(pattern.toString());
+    out.writeUTF(charset.name());
   }
 
 }



Mime
View raw message