Return-Path: X-Original-To: apmail-lucene-commits-archive@www.apache.org Delivered-To: apmail-lucene-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 1EAE010ED5 for ; Mon, 17 Feb 2014 13:19:56 +0000 (UTC) Received: (qmail 66946 invoked by uid 500); 17 Feb 2014 13:19:55 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 66933 invoked by uid 99); 17 Feb 2014 13:19:54 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 17 Feb 2014 13:19:54 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Mon, 17 Feb 2014 13:19:53 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 0185823888E4; Mon, 17 Feb 2014 13:19:33 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1568980 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/test-framework/ lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Date: Mon, 17 Feb 2014 13:19:32 -0000 To: commits@lucene.apache.org From: bimargulies@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140217131933.0185823888E4@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: bimargulies Date: Mon Feb 17 13:19:32 2014 New Revision: 1568980 URL: http://svn.apache.org/r1568980 Log: LUCENE-5448: backport to 4.x. Modified: lucene/dev/branches/branch_4x/ (props changed) lucene/dev/branches/branch_4x/lucene/ (props changed) lucene/dev/branches/branch_4x/lucene/test-framework/ (props changed) lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java?rev=1568980&r1=1568979&r2=1568980&view=diff ============================================================================== --- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java (original) +++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java Mon Feb 17 13:19:32 2014 @@ -598,7 +598,7 @@ public abstract class BaseTokenStreamTes } } else { // synthetic - text = randomAnalysisString(random, maxWordLength, simple); + text = _TestUtil.randomAnalysisString(random, maxWordLength, simple); } try { @@ -876,77 +876,6 @@ public abstract class BaseTokenStreamTes field.setReaderValue(useCharFilter ? new MockCharFilter(reader, remainder) : reader); } } - - private static String randomAnalysisString(Random random, int maxLength, boolean simple) { - assert maxLength >= 0; - - // sometimes just a purely random string - if (random.nextInt(31) == 0) { - return randomSubString(random, random.nextInt(maxLength), simple); - } - - // otherwise, try to make it more realistic with 'words' since most tests use MockTokenizer - // first decide how big the string will really be: 0..n - maxLength = random.nextInt(maxLength); - int avgWordLength = _TestUtil.nextInt(random, 3, 8); - StringBuilder sb = new StringBuilder(); - while (sb.length() < maxLength) { - if (sb.length() > 0) { - sb.append(' '); - } - int wordLength = -1; - while (wordLength < 0) { - wordLength = (int) (random.nextGaussian() * 3 + avgWordLength); - } - wordLength = Math.min(wordLength, maxLength - sb.length()); - sb.append(randomSubString(random, wordLength, simple)); - } - return sb.toString(); - } - - private static String randomSubString(Random random, int wordLength, boolean simple) { - if (wordLength == 0) { - return ""; - } - - int evilness = _TestUtil.nextInt(random, 0, 20); - - StringBuilder sb = new StringBuilder(); - while (sb.length() < wordLength) {; - if (simple) { - sb.append(random.nextBoolean() ? _TestUtil.randomSimpleString(random, wordLength) : _TestUtil.randomHtmlishString(random, wordLength)); - } else { - if (evilness < 10) { - sb.append(_TestUtil.randomSimpleString(random, wordLength)); - } else if (evilness < 15) { - assert sb.length() == 0; // we should always get wordLength back! - sb.append(_TestUtil.randomRealisticUnicodeString(random, wordLength, wordLength)); - } else if (evilness == 16) { - sb.append(_TestUtil.randomHtmlishString(random, wordLength)); - } else if (evilness == 17) { - // gives a lot of punctuation - sb.append(_TestUtil.randomRegexpishString(random, wordLength)); - } else { - sb.append(_TestUtil.randomUnicodeString(random, wordLength)); - } - } - } - if (sb.length() > wordLength) { - sb.setLength(wordLength); - if (Character.isHighSurrogate(sb.charAt(wordLength-1))) { - sb.setLength(wordLength-1); - } - } - - if (random.nextInt(17) == 0) { - // mix up case - String mixedUp = _TestUtil.randomlyRecaseCodePoints(random, sb.toString()); - assert mixedUp.length() == sb.length(); - return mixedUp; - } else { - return sb.toString(); - } - } protected String toDot(Analyzer a, String inputText) throws IOException { final StringWriter sw = new StringWriter(); Modified: lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java?rev=1568980&r1=1568979&r2=1568980&view=diff ============================================================================== --- lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java (original) +++ lucene/dev/branches/branch_4x/lucene/test-framework/src/java/org/apache/lucene/util/_TestUtil.java Mon Feb 17 13:19:32 2014 @@ -1044,6 +1044,77 @@ public class _TestUtil { } return out.toString(); } + + public static String randomAnalysisString(Random random, int maxLength, boolean simple) { + assert maxLength >= 0; + + // sometimes just a purely random string + if (random.nextInt(31) == 0) { + return randomSubString(random, random.nextInt(maxLength), simple); + } + + // otherwise, try to make it more realistic with 'words' since most tests use MockTokenizer + // first decide how big the string will really be: 0..n + maxLength = random.nextInt(maxLength); + int avgWordLength = _TestUtil.nextInt(random, 3, 8); + StringBuilder sb = new StringBuilder(); + while (sb.length() < maxLength) { + if (sb.length() > 0) { + sb.append(' '); + } + int wordLength = -1; + while (wordLength < 0) { + wordLength = (int) (random.nextGaussian() * 3 + avgWordLength); + } + wordLength = Math.min(wordLength, maxLength - sb.length()); + sb.append(randomSubString(random, wordLength, simple)); + } + return sb.toString(); + } + + public static String randomSubString(Random random, int wordLength, boolean simple) { + if (wordLength == 0) { + return ""; + } + + int evilness = _TestUtil.nextInt(random, 0, 20); + + StringBuilder sb = new StringBuilder(); + while (sb.length() < wordLength) {; + if (simple) { + sb.append(random.nextBoolean() ? _TestUtil.randomSimpleString(random, wordLength) : _TestUtil.randomHtmlishString(random, wordLength)); + } else { + if (evilness < 10) { + sb.append(_TestUtil.randomSimpleString(random, wordLength)); + } else if (evilness < 15) { + assert sb.length() == 0; // we should always get wordLength back! + sb.append(_TestUtil.randomRealisticUnicodeString(random, wordLength, wordLength)); + } else if (evilness == 16) { + sb.append(_TestUtil.randomHtmlishString(random, wordLength)); + } else if (evilness == 17) { + // gives a lot of punctuation + sb.append(_TestUtil.randomRegexpishString(random, wordLength)); + } else { + sb.append(_TestUtil.randomUnicodeString(random, wordLength)); + } + } + } + if (sb.length() > wordLength) { + sb.setLength(wordLength); + if (Character.isHighSurrogate(sb.charAt(wordLength-1))) { + sb.setLength(wordLength-1); + } + } + + if (random.nextInt(17) == 0) { + // mix up case + String mixedUp = _TestUtil.randomlyRecaseCodePoints(random, sb.toString()); + assert mixedUp.length() == sb.length(); + return mixedUp; + } else { + return sb.toString(); + } + } /** List of characters that match {@link Character#isWhitespace} */ public static final char[] WHITESPACE_CHARACTERS = new char[] {