lucenenet-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From synhers...@apache.org
Subject git commit: Mocks for test/Analysis
Date Mon, 07 Apr 2014 22:48:17 GMT
Repository: lucenenet
Updated Branches:
  refs/heads/branch_4x f1fbbd9f1 -> ac6215581


Mocks for test/Analysis


Project: http://git-wip-us.apache.org/repos/asf/lucenenet/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucenenet/commit/ac621558
Tree: http://git-wip-us.apache.org/repos/asf/lucenenet/tree/ac621558
Diff: http://git-wip-us.apache.org/repos/asf/lucenenet/diff/ac621558

Branch: refs/heads/branch_4x
Commit: ac6215581b6cdabd9073644fc49a67be5b88a01e
Parents: f1fbbd9
Author: synhershko <itamar@code972.com>
Authored: Tue Apr 8 01:48:01 2014 +0300
Committer: synhershko <itamar@code972.com>
Committed: Tue Apr 8 01:48:01 2014 +0300

----------------------------------------------------------------------
 src/core/Support/Arrays.cs                      |   5 +
 test/test-framework/Analysis/MockAnalyzer.cs    | 138 +++++++++++++++++++
 .../Analysis/MockFixedLengthPayloadFilter.cs    |  47 +++++++
 test/test-framework/Analysis/MockTokenFilter.cs |  97 +++++++++++++
 .../Analysis/MockVariableLengthPayloadFilter.cs |  43 ++++++
 .../Lucene.Net.TestFramework.csproj             |   3 +
 6 files changed, 333 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/src/core/Support/Arrays.cs
----------------------------------------------------------------------
diff --git a/src/core/Support/Arrays.cs b/src/core/Support/Arrays.cs
index 99c6b24..732f4aa 100644
--- a/src/core/Support/Arrays.cs
+++ b/src/core/Support/Arrays.cs
@@ -113,5 +113,10 @@ namespace Lucene.Net.Support
 
             return hashCode;
         }
+
+        public static List<T> asList<T>(params T[] objects)
+        {
+            return objects.ToList();
+        }
     }
 }

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Analysis/MockAnalyzer.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Analysis/MockAnalyzer.cs b/test/test-framework/Analysis/MockAnalyzer.cs
new file mode 100644
index 0000000..62e4a01
--- /dev/null
+++ b/test/test-framework/Analysis/MockAnalyzer.cs
@@ -0,0 +1,138 @@
+using System;
+using System.Collections.Generic;
+using System.IO;
+using System.Linq;
+using System.Text;
+using Lucene.Net.Support;
+using Lucene.Net.Util.Automaton;
+
+namespace Lucene.Net.Analysis
+{
+/**
+ * Analyzer for testing
+ * <p>
+ * This analyzer is a replacement for Whitespace/Simple/KeywordAnalyzers
+ * for unit tests. If you are testing a custom component such as a queryparser
+ * or analyzer-wrapper that consumes analysis streams, its a great idea to test
+ * it with this analyzer instead. MockAnalyzer has the following behavior:
+ * <ul>
+ *   <li>By default, the assertions in {@link MockTokenizer} are turned on for extra
+ *       checks that the consumer is consuming properly. These checks can be disabled
+ *       with {@link #setEnableChecks(boolean)}.
+ *   <li>Payload data is randomly injected into the stream for more thorough testing
+ *       of payloads.
+ * </ul>
+ * @see MockTokenizer
+ */
+public class MockAnalyzer : Analyzer {
+  private CharacterRunAutomaton runAutomaton;
+  private bool lowerCase;
+  private CharacterRunAutomaton filter;
+  private bool enablePositionIncrements;
+  private int positionIncrementGap;
+  private Random random;
+  private HashMap<String, int> previousMappings = new HashMap<String,int>();
+  private bool enableChecks = true;
+  private int maxTokenLength = MockTokenizer.DEFAULT_MAX_TOKEN_LENGTH;
+
+  /**
+   * Creates a new MockAnalyzer.
+   * 
+   * @param random Random for payloads behavior
+   * @param runAutomaton DFA describing how tokenization should happen (e.g. [a-zA-Z]+)
+   * @param lowerCase true if the tokenizer should lowercase terms
+   * @param filter DFA describing how terms should be filtered (set of stopwords, etc)
+   * @param enablePositionIncrements true if position increments should reflect filtered
terms.
+   */
+  public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, bool lowerCase,
CharacterRunAutomaton filter, bool enablePositionIncrements) : base(new PerFieldReuseStrategy())
{
+    // TODO: this should be solved in a different way; Random should not be shared (!).
+    this.random = new Random(random.nextLong());
+    this.runAutomaton = runAutomaton;
+    this.lowerCase = lowerCase;
+    this.filter = filter;
+    this.enablePositionIncrements = enablePositionIncrements;
+  }
+
+  /**
+   * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton,
boolean) 
+   * MockAnalyzer(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false}).
+   */
+  public MockAnalyzer(Random random, CharacterRunAutomaton runAutomaton, bool lowerCase)
:
+    this(random, runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, true){
+  }
+
+  /** 
+   * Create a Whitespace-lowercasing analyzer with no stopwords removal.
+   * <p>
+   * Calls {@link #MockAnalyzer(Random, CharacterRunAutomaton, boolean, CharacterRunAutomaton,
boolean) 
+   * MockAnalyzer(random, MockTokenizer.WHITESPACE, true, MockTokenFilter.EMPTY_STOPSET,
false}).
+   */
+  public MockAnalyzer(Random random):
+    this(random, MockTokenizer.WHITESPACE, true){
+  }
+
+  public override TokenStreamComponents CreateComponents(String fieldName, TextReader reader)
{
+    MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase, maxTokenLength);
+    tokenizer.setEnableChecks(enableChecks);
+    MockTokenFilter filt = new MockTokenFilter(tokenizer, filter);
+    filt.setEnablePositionIncrements(enablePositionIncrements);
+    return new TokenStreamComponents(tokenizer, maybePayload(filt, fieldName));
+  }
+  
+    // TODO synchronized
+  private TokenFilter maybePayload(TokenFilter stream, String fieldName) {
+    var val = previousMappings.Get(fieldName);
+    if (val == null) {
+      val = -1; // no payloads
+      if (LuceneTestCase.rarely(random)) {
+        switch(random.nextInt(3)) {
+          case 0: val = -1; // no payloads
+                  break;
+          case 1: val = int.MaxValue; // variable length payload
+                  break;
+          case 2: val = random.Next(0, 12); // fixed length payload
+                  break;
+        }
+      }
+      if (LuceneTestCase.VERBOSE) {
+        if (val == int.MaxValue) {
+          Console.WriteLine("MockAnalyzer: field=" + fieldName + " gets variable length payloads");
+        } else if (val != -1) {
+          Console.WriteLine("MockAnalyzer: field=" + fieldName + " gets fixed length=" +
val + " payloads");
+        }
+      }
+      previousMappings.put(fieldName, val); // save it so we are consistent for this field
+    }
+    
+    if (val == -1)
+      return stream;
+    else if (val == int.MaxValue)
+      return new MockVariableLengthPayloadFilter(random, stream);
+    else
+      return new MockFixedLengthPayloadFilter(random, stream, val);
+  }
+  
+  public void SetPositionIncrementGap(int positionIncrementGap){
+    this.positionIncrementGap = positionIncrementGap;
+  }
+  
+  public override int GetPositionIncrementGap(String fieldName){
+    return positionIncrementGap;
+  }
+  
+  /** 
+   * Toggle consumer workflow checking: if your test consumes tokenstreams normally you
+   * should leave this enabled.
+   */
+  public void setEnableChecks(bool enableChecks) {
+    this.enableChecks = enableChecks;
+  }
+  
+  /** 
+   * Toggle maxTokenLength for MockTokenizer
+   */
+  public void setMaxTokenLength(int length) {
+    this.maxTokenLength = length;
+  }
+}
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Analysis/MockFixedLengthPayloadFilter.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Analysis/MockFixedLengthPayloadFilter.cs b/test/test-framework/Analysis/MockFixedLengthPayloadFilter.cs
new file mode 100644
index 0000000..c6e7069
--- /dev/null
+++ b/test/test-framework/Analysis/MockFixedLengthPayloadFilter.cs
@@ -0,0 +1,47 @@
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis
+{
+    /**
+     * TokenFilter that adds random fixed-length payloads.
+     */
+    public class MockFixedLengthPayloadFilter : TokenFilter
+    {
+        private readonly PayloadAttribute payloadAtt;
+        private Random random;
+        private sbyte[] bytes;
+        private BytesRef payload;
+
+        public MockFixedLengthPayloadFilter(Random random, TokenStream ts, int length)
+            : base(ts)
+        {
+            if (length < 0)
+            {
+                throw new ArgumentException("length must be >= 0");
+            }
+            this.random = random;
+            this.bytes = new sbyte[length];
+            this.payload = new BytesRef(bytes);
+
+            payloadAtt = AddAttribute<PayloadAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                byte[] b = new byte[bytes.Length];
+                random.NextBytes(b);
+                Buffer.BlockCopy(b, 0, bytes, 0, b.Length);
+                payloadAtt.Payload = payload;
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Analysis/MockTokenFilter.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Analysis/MockTokenFilter.cs b/test/test-framework/Analysis/MockTokenFilter.cs
new file mode 100644
index 0000000..0e39e45
--- /dev/null
+++ b/test/test-framework/Analysis/MockTokenFilter.cs
@@ -0,0 +1,97 @@
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Support;
+using Lucene.Net.Util.Automaton;
+
+namespace Lucene.Net.Analysis
+{
+/**
+ * A tokenfilter for testing that removes terms accepted by a DFA.
+ * <ul>
+ *  <li>Union a list of singletons to act like a stopfilter.
+ *  <li>Use the complement to act like a keepwordfilter
+ *  <li>Use a regex like <code>.{12,}</code> to act like a lengthfilter
+ * </ul>
+ */
+    internal class MockTokenFilter : TokenFilter
+    {
+        /** Empty set of stopwords */
+
+        public static CharacterRunAutomaton EMPTY_STOPSET =
+            new CharacterRunAutomaton(BasicAutomata.MakeEmpty());
+
+        /** Set of common english stopwords */
+
+        public static CharacterRunAutomaton ENGLISH_STOPSET =
+            new CharacterRunAutomaton(BasicOperations.Union(Arrays.asList<Automaton>(
+                makeString("a"), makeString("an"), makeString("and"), makeString("are"),
+                makeString("as"), makeString("at"), makeString("be"), makeString("but"),
+                makeString("by"), makeString("for"), makeString("if"), makeString("in"),
+                makeString("into"), makeString("is"), makeString("it"), makeString("no"),
+                makeString("not"), makeString("of"), makeString("on"), makeString("or"),
+                makeString("such"), makeString("that"), makeString("the"), makeString("their"),
+                makeString("then"), makeString("there"), makeString("these"), makeString("they"),
+                makeString("this"), makeString("to"), makeString("was"), makeString("will"),
+                makeString("with"))));
+
+        private static Automaton makeString(string an)
+        {
+            return BasicAutomata.MakeString(an);
+        }
+
+        private CharacterRunAutomaton filter;
+        private bool enablePositionIncrements = true;
+
+        private readonly CharTermAttribute termAtt;
+        private readonly PositionIncrementAttribute posIncrAtt;
+       
+        public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter):base(input)
+        {
+            this.filter = filter;
+            termAtt = AddAttribute<CharTermAttribute>();
+            posIncrAtt = AddAttribute<PositionIncrementAttribute>();
+        }
+
+        public override bool IncrementToken()
+        {
+            // TODO: fix me when posInc=false, to work like FilteringTokenFilter in that
case and not return
+            // initial token with posInc=0 ever
+
+            // return the first non-stop word found
+            int skippedPositions = 0;
+            while (input.IncrementToken())
+            {
+                if (!filter.Run(termAtt.Buffer, 0, termAtt.Length))
+                {
+                    if (enablePositionIncrements)
+                    {
+                        posIncrAtt.PositionIncrement = posIncrAtt.PositionIncrement + skippedPositions;
+                    }
+                    return true;
+                }
+                skippedPositions += posIncrAtt.PositionIncrement;
+            }
+            // reached EOS -- return false
+            return false;
+        }
+
+        /**
+   * @see #setEnablePositionIncrements(boolean)
+   */
+
+        public bool getEnablePositionIncrements()
+        {
+            return enablePositionIncrements;
+        }
+
+        /**
+   * If <code>true</code>, this Filter will preserve
+   * positions of the incoming tokens (ie, accumulate and
+   * set position increments of the removed stop tokens).
+   */
+
+        public void setEnablePositionIncrements(bool enable)
+        {
+            this.enablePositionIncrements = enable;
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Analysis/MockVariableLengthPayloadFilter.cs
----------------------------------------------------------------------
diff --git a/test/test-framework/Analysis/MockVariableLengthPayloadFilter.cs b/test/test-framework/Analysis/MockVariableLengthPayloadFilter.cs
new file mode 100644
index 0000000..fb1730d
--- /dev/null
+++ b/test/test-framework/Analysis/MockVariableLengthPayloadFilter.cs
@@ -0,0 +1,43 @@
+using System;
+using Lucene.Net.Analysis.Tokenattributes;
+using Lucene.Net.Util;
+
+namespace Lucene.Net.Analysis
+{
+    /**
+ * TokenFilter that adds random variable-length payloads.
+ */
+    public class MockVariableLengthPayloadFilter : TokenFilter
+    {
+        private static int MAXLENGTH = 129;
+
+        private readonly PayloadAttribute payloadAtt;
+        private Random random;
+        private sbyte[] bytes = new sbyte[MAXLENGTH];
+        private BytesRef payload;
+
+        public MockVariableLengthPayloadFilter(Random random, TokenStream ts)
+            : base(ts)
+        {
+            this.random = random;
+            this.payload = new BytesRef(bytes);
+        }
+
+        public override bool IncrementToken()
+        {
+            if (input.IncrementToken())
+            {
+                byte[] b = new byte[MAXLENGTH];
+                random.NextBytes(b);
+                Buffer.BlockCopy(b, 0, bytes, 0, b.Length);
+                payload.length = random.Next(MAXLENGTH);
+                payloadAtt.Payload = payload;
+                return true;
+            }
+            else
+            {
+                return false;
+            }
+        }
+    }
+}

http://git-wip-us.apache.org/repos/asf/lucenenet/blob/ac621558/test/test-framework/Lucene.Net.TestFramework.csproj
----------------------------------------------------------------------
diff --git a/test/test-framework/Lucene.Net.TestFramework.csproj b/test/test-framework/Lucene.Net.TestFramework.csproj
index 6b31aaa..b0c3d77 100644
--- a/test/test-framework/Lucene.Net.TestFramework.csproj
+++ b/test/test-framework/Lucene.Net.TestFramework.csproj
@@ -56,7 +56,10 @@
     <Reference Include="System.Xml" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="Analysis\MockFixedLengthPayloadFilter.cs" />
+    <Compile Include="Analysis\MockTokenFilter.cs" />
     <Compile Include="Analysis\MockTokenizer.cs" />
+    <Compile Include="Analysis\MockVariableLengthPayloadFilter.cs" />
     <Compile Include="JavaCompatibility\LuceneTestCase.cs" />
     <Compile Include="JavaCompatibility\LuceneTypesHelpers.cs" />
     <Compile Include="JavaCompatibility\SystemTypesHelpers.cs" />


Mime
View raw message