lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From da...@apache.org
Subject [16/36] lucene-solr:jira/http2: LUCENE-8429: Avoid stack overflows in DaciukMihovAutomatonBuilder.
Date Tue, 31 Jul 2018 02:32:32 GMT
LUCENE-8429: Avoid stack overflows in DaciukMihovAutomatonBuilder.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/d78feb22
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/d78feb22
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/d78feb22

Branch: refs/heads/jira/http2
Commit: d78feb22361cef5323793fdff33de621320d7b4b
Parents: 5342048
Author: Adrien Grand <jpountz@gmail.com>
Authored: Fri Jul 27 11:11:00 2018 +0200
Committer: Adrien Grand <jpountz@gmail.com>
Committed: Fri Jul 27 11:11:28 2018 +0200

----------------------------------------------------------------------
 lucene/CHANGES.txt                              |  3 ++
 .../automaton/DaciukMihovAutomatonBuilder.java  | 12 +++++-
 .../TestDaciukMihovAutomatonBuilder.java        | 39 ++++++++++++++++++++
 3 files changed, 53 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d78feb22/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 2876fa7..0f13dd3 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -189,6 +189,9 @@ Bug Fixes:
 
 * LUCENE-8398: TieredMergePolicy.getMaxMergedSegmentMB has rounding error (Erick Erickson)
 
+* LUCENE-8429: DaciukMihovAutomatonBuilder is no longer prone to stack
+  overflows by enforcing a maximum term length. (Adrien Grand)
+
 Changes in Runtime Behavior:
 
 * LUCENE-7976: TieredMergePolicy now respects maxSegmentSizeMB by default when executing

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d78feb22/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
b/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
index 60ec865..3757e82 100644
--- a/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
+++ b/lucene/core/src/java/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java
@@ -33,7 +33,14 @@ import org.apache.lucene.util.UnicodeUtil;
  * @see Automata#makeStringUnion(Collection)
  */
 public final class DaciukMihovAutomatonBuilder {
-  
+
+  /**
+   * This builder rejects terms that are more than 1k chars long since it then
+   * uses recursion based on the length of the string, which might cause stack
+   * overflows.
+   */
+  static final int MAX_TERM_LENGTH = 1_000;
+
   /**
    * The default constructor is private.  Use static methods directly.
    */
@@ -220,6 +227,9 @@ public final class DaciukMihovAutomatonBuilder {
    * to this automaton (the input must be sorted).
    */
   public void add(CharsRef current) {
+    if (current.length > MAX_TERM_LENGTH) {
+      throw new IllegalArgumentException("This builder doesn't allow terms that are larger
than 1,000 characters, got " + current);
+    }
     assert stateRegistry != null : "Automaton already built.";
     assert previous == null
         || comparator.compare(previous, current) <= 0 : "Input must be in sorted UTF-8
order: "

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/d78feb22/lucene/core/src/test/org/apache/lucene/util/automaton/TestDaciukMihovAutomatonBuilder.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/util/automaton/TestDaciukMihovAutomatonBuilder.java
b/lucene/core/src/test/org/apache/lucene/util/automaton/TestDaciukMihovAutomatonBuilder.java
new file mode 100644
index 0000000..fa8f154
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/util/automaton/TestDaciukMihovAutomatonBuilder.java
@@ -0,0 +1,39 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.util.automaton;
+
+import java.util.Arrays;
+import java.util.Collections;
+
+import org.apache.lucene.util.ArrayUtil;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.LuceneTestCase;
+
+public class TestDaciukMihovAutomatonBuilder extends LuceneTestCase {
+
+  public void testLargeTerms() {
+    byte[] b10k = new byte[10_000];
+    Arrays.fill(b10k, (byte) 'a');
+    IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
+        () -> DaciukMihovAutomatonBuilder.build(Collections.singleton(new BytesRef(b10k))));
+    assertTrue(e.getMessage().startsWith("This builder doesn't allow terms that are larger
than 1,000 characters"));
+
+    byte[] b1k = ArrayUtil.copyOfSubArray(b10k, 0, 1000);
+    DaciukMihovAutomatonBuilder.build(Collections.singleton(new BytesRef(b1k))); // no exception
+  }
+
+}


Mime
View raw message