lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sim...@apache.org
Subject svn commit: r1455325 - in /lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous: PatternKeywordMarkerFilter.java SetKeywordMarkerFilter.java
Date Mon, 11 Mar 2013 20:51:30 GMT
Author: simonw
Date: Mon Mar 11 20:51:29 2013
New Revision: 1455325

URL: http://svn.apache.org/r1455325
Log:
LUCENE-4822: Add missing files - I blame git...

Added:
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java
  (with props)
    lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
  (with props)

Added: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java?rev=1455325&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java
(added)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/PatternKeywordMarkerFilter.java
Mon Mar 11 20:51:29 2013
@@ -0,0 +1,56 @@
+package org.apache.lucene.analysis.miscellaneous;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+
+/**
+ * Marks terms as keywords via the {@link KeywordAttribute}. Each token
+ * that matches the provided pattern is marked as a keyword by setting
+ * {@link KeywordAttribute#setKeyword(boolean)} to <code>true</code>.
+ */
+public final class PatternKeywordMarkerFilter extends KeywordMarkerFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final Matcher matcher;
+  
+  /**
+   * Create a new {@link PatternKeywordMarkerFilter}, that marks the current
+   * token as a keyword if the tokens term buffer matches the provided
+   * {@link Pattern} via the {@link KeywordAttribute}.
+   * 
+   * @param in
+   *          TokenStream to filter
+   * @param pattern
+   *          the pattern to apply to the incoming term buffer
+   **/
+  protected PatternKeywordMarkerFilter(TokenStream in, Pattern pattern) {
+    super(in);
+    this.matcher = pattern.matcher("");
+  }
+  
+  @Override
+  protected boolean isKeyword() {
+    matcher.reset(termAtt);
+    return matcher.matches();
+  }
+  
+}

Added: lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java?rev=1455325&view=auto
==============================================================================
--- lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
(added)
+++ lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/SetKeywordMarkerFilter.java
Mon Mar 11 20:51:29 2013
@@ -0,0 +1,52 @@
+package org.apache.lucene.analysis.miscellaneous;
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+import org.apache.lucene.analysis.util.CharArraySet;
+
+/**
+ * Marks terms as keywords via the {@link KeywordAttribute}. Each token
+ * contained in the provided set is marked as a keyword by setting
+ * {@link KeywordAttribute#setKeyword(boolean)} to <code>true</code>.
+ */
+public final class SetKeywordMarkerFilter extends KeywordMarkerFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final CharArraySet keywordSet;
+
+  /**
+   * Create a new KeywordSetMarkerFilter, that marks the current token as a
+   * keyword if the tokens term buffer is contained in the given set via the
+   * {@link KeywordAttribute}.
+   * 
+   * @param in
+   *          TokenStream to filter
+   * @param keywordSet
+   *          the keywords set to lookup the current termbuffer
+   */
+  public SetKeywordMarkerFilter(final TokenStream in, final CharArraySet keywordSet) {
+    super(in);
+    this.keywordSet = keywordSet;
+  }
+
+  @Override
+  protected boolean isKeyword() {
+    return keywordSet.contains(termAtt.buffer(), 0, termAtt.length());
+  }
+  
+}



Mime
View raw message