Author: jonesde
Date: Mon Dec 11 00:57:02 2006
New Revision: 485561
URL: http://svn.apache.org/viewvc?view=rev&rev=485561
Log:
Refactored KeywordSearch class to move some more generic search code and properties to the
common component
Added:
incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties (with props)
incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java (with
props)
Modified:
incubator/ofbiz/trunk/applications/product/config/prodsearch.properties
incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java
incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java
incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java
Modified: incubator/ofbiz/trunk/applications/product/config/prodsearch.properties
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/config/prodsearch.properties?view=diff&rev=485561&r1=485560&r2=485561
==============================================================================
--- incubator/ofbiz/trunk/applications/product/config/prodsearch.properties (original)
+++ incubator/ofbiz/trunk/applications/product/config/prodsearch.properties Mon Dec 11 00:57:02
2006
@@ -1,5 +1,4 @@
#####################################################################
-#
# Copyright 2001-2006 The Apache Software Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
@@ -15,23 +14,8 @@
# under the License.
#####################################################################
####
-# OFBiz Search Settings
+# OFBiz Product Search Settings
####
-
-# The stop word bags contain words to be removed from search keyword list
-# These should be colon separated and the list should start and end with colons
-# The words should all be lower case
-# The .or is for OR searches and the .and for AND searches
-stop.word.bag.or=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using:
-stop.word.bag.and=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using:
-
-# The stem bag is used to remove suffixes from words passed in the search string and found
while indexing
-# IF the remove.stems properties is true
-remove.stems=true
-stem.bag=:s:ies:y:
-
-# Characters that should be used as token separators when pulling out keywords
-index.keyword.separators=;: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_
# Assign a weight to each product keyword source during indexing/keywork inuduction
index.weight.Product.productId=1
Modified: incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java?view=diff&rev=485561&r1=485560&r2=485561
==============================================================================
--- incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java
(original)
+++ incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordIndex.java
Mon Dec 11 00:57:02 2006
@@ -32,6 +32,7 @@
import org.ofbiz.base.util.UtilDateTime;
import org.ofbiz.base.util.UtilMisc;
import org.ofbiz.base.util.UtilProperties;
+import org.ofbiz.common.KeywordSearchUtil;
import org.ofbiz.content.data.DataResourceWorker;
import org.ofbiz.entity.GenericDelegator;
import org.ofbiz.entity.GenericEntityException;
@@ -68,11 +69,11 @@
String productId = product.getString("productId");
// get these in advance just once since they will be used many times for the multiple
strings to index
- String separators = KeywordSearch.getSeparators();
- String stopWordBagOr = KeywordSearch.getStopWordBagOr();
- String stopWordBagAnd = KeywordSearch.getStopWordBagAnd();
- boolean removeStems = KeywordSearch.getRemoveStems();
- Set stemSet = KeywordSearch.getStemSet();
+ String separators = KeywordSearchUtil.getSeparators();
+ String stopWordBagOr = KeywordSearchUtil.getStopWordBagOr();
+ String stopWordBagAnd = KeywordSearchUtil.getStopWordBagAnd();
+ boolean removeStems = KeywordSearchUtil.getRemoveStems();
+ Set stemSet = KeywordSearchUtil.getStemSet();
Map keywords = new TreeMap();
List strings = new ArrayList(50);
@@ -176,7 +177,7 @@
while (strIter.hasNext()) {
String str = (String) strIter.next();
// call process keywords method here
- KeywordSearch.processKeywordsForIndex(str, keywords, separators, stopWordBagAnd,
stopWordBagOr, removeStems, stemSet);
+ KeywordSearchUtil.processKeywordsForIndex(str, keywords, separators, stopWordBagAnd,
stopWordBagOr, removeStems, stemSet);
}
List toBeStored = new LinkedList();
Modified: incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java?view=diff&rev=485561&r1=485560&r2=485561
==============================================================================
--- incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java
(original)
+++ incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/KeywordSearch.java
Mon Dec 11 00:57:02 2006
@@ -16,227 +16,15 @@
*/
package org.ofbiz.product.product;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.StringTokenizer;
-import java.util.TreeSet;
-
-import org.ofbiz.base.util.Debug;
-import org.ofbiz.base.util.UtilMisc;
-import org.ofbiz.base.util.UtilProperties;
-import org.ofbiz.base.util.UtilValidate;
-import org.ofbiz.entity.GenericDelegator;
import org.ofbiz.entity.GenericEntityException;
import org.ofbiz.entity.GenericValue;
/**
- * Does a product search by keyword using the PRODUCT_KEYWORD table.
- * <br/>Special thanks to Glen Thorne and the Weblogic Commerce Server for ideas.
+ * These are left over utlity methods from the product search code, just calling over to
KeywordIndex now; can probably remove soon.
*/
public class KeywordSearch {
public static final String module = KeywordSearch.class.getName();
-
- public static Set thesaurusRelsToInclude = new HashSet();
- public static Set thesaurusRelsForReplace = new HashSet();
-
- static {
- thesaurusRelsToInclude.add("KWTR_UF");
- thesaurusRelsToInclude.add("KWTR_USE");
- thesaurusRelsToInclude.add("KWTR_CS");
- thesaurusRelsToInclude.add("KWTR_NT");
- thesaurusRelsToInclude.add("KWTR_BT");
- thesaurusRelsToInclude.add("KWTR_RT");
-
- thesaurusRelsForReplace.add("KWTR_USE");
- thesaurusRelsForReplace.add("KWTR_CS");
- }
-
- public static String getSeparators() {
- // String separators = ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_";
- String seps = UtilProperties.getPropertyValue("prodsearch", "index.keyword.separators",
";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_");
- return seps;
- }
-
- public static String getStopWordBagOr() {
- return UtilProperties.getPropertyValue("prodsearch", "stop.word.bag.or");
- }
- public static String getStopWordBagAnd() {
- return UtilProperties.getPropertyValue("prodsearch", "stop.word.bag.and");
- }
-
- public static boolean getRemoveStems() {
- String removeStemsStr = UtilProperties.getPropertyValue("prodsearch", "remove.stems");
- return "true".equals(removeStemsStr);
- }
- public static Set getStemSet() {
- String stemBag = UtilProperties.getPropertyValue("prodsearch", "stem.bag");
- Set stemSet = new TreeSet();
- if (UtilValidate.isNotEmpty(stemBag)) {
- String curToken;
- StringTokenizer tokenizer = new StringTokenizer(stemBag, ": ");
- while (tokenizer.hasMoreTokens()) {
- curToken = tokenizer.nextToken();
- stemSet.add(curToken);
- }
- }
- return stemSet;
- }
-
- public static void processForKeywords(String str, Map keywords, boolean forSearch, boolean
anyPrefix, boolean anySuffix, boolean isAnd) {
- String separators = getSeparators();
- String stopWordBagOr = getStopWordBagOr();
- String stopWordBagAnd = getStopWordBagAnd();
-
- boolean removeStems = getRemoveStems();
- Set stemSet = getStemSet();
-
- processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems,
stemSet, forSearch, anyPrefix, anySuffix, isAnd);
- }
-
- public static void processKeywordsForIndex(String str, Map keywords, String separators,
String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet) {
- processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems,
stemSet, false, false, false, false);
- }
-
- public static void processForKeywords(String str, Map keywords, String separators, String
stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch,
boolean anyPrefix, boolean anySuffix, boolean isAnd) {
- Set keywordSet = makeKeywordSet(str, separators, forSearch);
- fixupKeywordSet(keywordSet, keywords, stopWordBagAnd, stopWordBagOr, removeStems,
stemSet, forSearch, anyPrefix, anySuffix, isAnd);
- }
-
- public static void fixupKeywordSet(Set keywordSet, Map keywords, String stopWordBagAnd,
String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix,
boolean anySuffix, boolean isAnd) {
- if (keywordSet == null) {
- return;
- }
-
- Iterator keywordIter = keywordSet.iterator();
- while (keywordIter.hasNext()) {
- String token = (String) keywordIter.next();
-
- // when cleaning up the tokens the ordering is inportant: check stop words, remove
stems, then get rid of 1 character tokens (1 digit okay)
-
- // check stop words
- String colonToken = ":" + token + ":";
- if (forSearch) {
- if ((isAnd && stopWordBagAnd.indexOf(colonToken) >= 0) || (!isAnd
&& stopWordBagOr.indexOf(colonToken) >= 0)) {
- continue;
- }
- } else {
- if (stopWordBagOr.indexOf(colonToken) >= 0 && stopWordBagAnd.indexOf(colonToken)
>= 0) {
- continue;
- }
- }
-
- // remove stems
- if (removeStems) {
- Iterator stemIter = stemSet.iterator();
- while (stemIter.hasNext()) {
- String stem = (String) stemIter.next();
- if (token.endsWith(stem)) {
- token = token.substring(0, token.length() - stem.length());
- }
- }
- }
-
- // get rid of all length 0 tokens now
- if (token.length() == 0) {
- continue;
- }
-
- // get rid of all length 1 character only tokens, pretty much useless
- if (token.length() == 1 && Character.isLetter(token.charAt(0))) {
- continue;
- }
-
- if (forSearch) {
- StringBuffer strSb = new StringBuffer();
- if (anyPrefix) strSb.append('%');
- strSb.append(token);
- if (anySuffix) strSb.append('%');
- // replace all %% with %
- int dblPercIdx = -1;
- while ((dblPercIdx = strSb.indexOf("%%")) >= 0) {
- //Debug.logInfo("before strSb: " + strSb, module);
- strSb.replace(dblPercIdx, dblPercIdx+2, "%");
- //Debug.logInfo("after strSb: " + strSb, module);
- }
- token = strSb.toString();
- }
-
- // group by word, add up weight
- Long curWeight = (Long) keywords.get(token);
- if (curWeight == null) {
- keywords.put(token, new Long(1));
- } else {
- keywords.put(token, new Long(curWeight.longValue() + 1));
- }
- }
- }
-
- public static Set makeKeywordSet(String str, String separators, boolean forSearch) {
- if (separators == null) separators = getSeparators();
-
- Set keywords = new TreeSet();
- if (str.length() > 0) {
- if (forSearch) {
- // remove %_*? from separators if is for a search
- StringBuffer sb = new StringBuffer(separators);
- if (sb.indexOf("%") >= 0) sb.deleteCharAt(sb.indexOf("%"));
- if (sb.indexOf("_") >= 0) sb.deleteCharAt(sb.indexOf("_"));
- if (sb.indexOf("*") >= 0) sb.deleteCharAt(sb.indexOf("*"));
- if (sb.indexOf("?") >= 0) sb.deleteCharAt(sb.indexOf("?"));
- separators = sb.toString();
- }
-
- StringTokenizer tokener = new StringTokenizer(str, separators, false);
- while (tokener.hasMoreTokens()) {
- // make sure it is lower case before doing anything else
- String token = tokener.nextToken().toLowerCase();
-
- if (forSearch) {
- // these characters will only be present if it is for a search, ie not
for indexing
- token = token.replace('*', '%');
- token = token.replace('?', '_');
- }
-
- keywords.add(token);
- }
- }
- return keywords;
- }
-
- public static Set fixKeywordsForSearch(Set keywordSet, boolean anyPrefix, boolean anySuffix,
boolean removeStems, boolean isAnd) {
- Map keywords = new HashMap();
- fixupKeywordSet(keywordSet, keywords, getStopWordBagAnd(), getStopWordBagOr(), removeStems,
getStemSet(), true, anyPrefix, anySuffix, isAnd);
- return keywords.keySet();
- }
-
- public static boolean expandKeywordForSearch(String enteredKeyword, Set addToSet, GenericDelegator
delegator) {
- boolean replaceEnteredKeyword = false;
-
- try {
- List thesaurusList = delegator.findByAndCache("KeywordThesaurus", UtilMisc.toMap("enteredKeyword",
enteredKeyword));
- Iterator thesaurusIter = thesaurusList.iterator();
- while (thesaurusIter.hasNext()) {
- GenericValue keywordThesaurus = (GenericValue) thesaurusIter.next();
- String relationshipEnumId = (String) keywordThesaurus.get("relationshipEnumId");
- if (thesaurusRelsToInclude.contains(relationshipEnumId)) {
- addToSet.addAll(makeKeywordSet(keywordThesaurus.getString("alternateKeyword"),
null, true));
- if (thesaurusRelsForReplace.contains(relationshipEnumId)) {
- replaceEnteredKeyword = true;
- }
- }
- }
- } catch (GenericEntityException e) {
- Debug.logError(e, "Error expanding entered keyword", module);
- }
-
- Debug.logInfo("Expanded keyword [" + enteredKeyword + "], got set: " + addToSet,
module);
- return replaceEnteredKeyword;
- }
public static void induceKeywords(GenericValue product) throws GenericEntityException
{
if (product == null) return;
Modified: incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java?view=diff&rev=485561&r1=485560&r2=485561
==============================================================================
--- incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java
(original)
+++ incubator/ofbiz/trunk/applications/product/src/org/ofbiz/product/product/ProductSearch.java
Mon Dec 11 00:57:02 2006
@@ -33,6 +33,7 @@
import org.ofbiz.base.util.UtilMisc;
import org.ofbiz.base.util.UtilProperties;
import org.ofbiz.base.util.UtilValidate;
+import org.ofbiz.common.KeywordSearchUtil;
import org.ofbiz.entity.GenericDelegator;
import org.ofbiz.entity.GenericEntityException;
import org.ofbiz.entity.GenericValue;
@@ -840,7 +841,7 @@
}
public Set makeFullKeywordSet(GenericDelegator delegator) {
- Set keywordSet = KeywordSearch.makeKeywordSet(this.keywordsString, null, true);
+ Set keywordSet = KeywordSearchUtil.makeKeywordSet(this.keywordsString, null,
true);
Set fullKeywordSet = new TreeSet();
// expand the keyword list according to the thesaurus and create a new set of
keywords
@@ -848,7 +849,7 @@
while (keywordIter.hasNext()) {
String keyword = (String) keywordIter.next();
Set expandedSet = new TreeSet();
- boolean replaceEntered = KeywordSearch.expandKeywordForSearch(keyword, expandedSet,
delegator);
+ boolean replaceEntered = KeywordSearchUtil.expandKeywordForSearch(keyword,
expandedSet, delegator);
fullKeywordSet.addAll(expandedSet);
if (!replaceEntered) {
fullKeywordSet.add(keyword);
@@ -867,18 +868,18 @@
//but then the sets should be and'ed to produce the overall expression; create
the SQL for this
//needs some work as the current method only support a list of and'ed words
and a list of or'ed words, not
//a list of or'ed sets to be and'ed together
- Set keywordSet = KeywordSearch.makeKeywordSet(this.keywordsString, null,
true);
+ Set keywordSet = KeywordSearchUtil.makeKeywordSet(this.keywordsString, null,
true);
// expand the keyword list according to the thesaurus and create a new set
of keywords
Iterator keywordIter = keywordSet.iterator();
while (keywordIter.hasNext()) {
String keyword = (String) keywordIter.next();
Set expandedSet = new TreeSet();
- boolean replaceEntered = KeywordSearch.expandKeywordForSearch(keyword,
expandedSet, productSearchContext.getDelegator());
+ boolean replaceEntered = KeywordSearchUtil.expandKeywordForSearch(keyword,
expandedSet, productSearchContext.getDelegator());
if (!replaceEntered) {
expandedSet.add(keyword);
}
- Set fixedSet = KeywordSearch.fixKeywordsForSearch(expandedSet, anyPrefix,
anySuffix, removeStems, isAnd);
+ Set fixedSet = KeywordSearchUtil.fixKeywordsForSearch(expandedSet, anyPrefix,
anySuffix, removeStems, isAnd);
Set fixedKeywordSet = new HashSet();
fixedKeywordSet.addAll(fixedSet);
productSearchContext.keywordFixedOrSetAndList.add(fixedKeywordSet);
@@ -886,7 +887,7 @@
} else {
// when isAnd is false, just add all of the new entries to the big list
Set keywordFirstPass = makeFullKeywordSet(productSearchContext.getDelegator());
// includes keyword expansion, etc
- Set keywordSet = KeywordSearch.fixKeywordsForSearch(keywordFirstPass, anyPrefix,
anySuffix, removeStems, isAnd);
+ Set keywordSet = KeywordSearchUtil.fixKeywordsForSearch(keywordFirstPass,
anyPrefix, anySuffix, removeStems, isAnd);
productSearchContext.orKeywordFixedSet.addAll(keywordSet);
}
Added: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties?view=auto&rev=485561
==============================================================================
--- incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties (added)
+++ incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties Mon Dec 11 00:57:02
2006
@@ -0,0 +1,33 @@
+#####################################################################
+# Copyright 2001-2006 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations
+# under the License.
+#####################################################################
+####
+# OFBiz General Keyword Search Settings
+####
+
+# The stop word bags contain words to be removed from search keyword list
+# These should be colon separated and the list should start and end with colons
+# The words should all be lower case
+# The .or is for OR searches and the .and for AND searches
+stop.word.bag.or=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using:
+stop.word.bag.and=:the:and:or:not:if:is:it:of:to:a:as:at:in:into:on:onto:so:but:me:you:your:yes:no:this:that:there:their:because:for:while:with:without:get:put:have:has:do:does:same:different:use:using:
+
+# The stem bag is used to remove suffixes from words passed in the search string and found
while indexing
+# IF the remove.stems properties is true
+remove.stems=true
+stem.bag=:s:ies:y:
+
+# Characters that should be used as token separators when pulling out keywords
+index.keyword.separators=;: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_
Propchange: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties
------------------------------------------------------------------------------
svn:keywords = "Date Rev Author URL Id"
Propchange: incubator/ofbiz/trunk/framework/common/config/keywordsearch.properties
------------------------------------------------------------------------------
svn:mime-type = text/plain
Added: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java
URL: http://svn.apache.org/viewvc/incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java?view=auto&rev=485561
==============================================================================
--- incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java (added)
+++ incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java Mon
Dec 11 00:57:02 2006
@@ -0,0 +1,239 @@
+/*
+ *
+ * Copyright 2001-2006 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License"); you may not
+ * use this file except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ */
+package org.ofbiz.common;
+
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+import java.util.StringTokenizer;
+import java.util.TreeSet;
+
+import org.ofbiz.base.util.Debug;
+import org.ofbiz.base.util.UtilMisc;
+import org.ofbiz.base.util.UtilProperties;
+import org.ofbiz.base.util.UtilValidate;
+import org.ofbiz.entity.GenericDelegator;
+import org.ofbiz.entity.GenericEntityException;
+import org.ofbiz.entity.GenericValue;
+
+/**
+ * A few utility methods related to Keyword Search.
+ */
+public class KeywordSearchUtil {
+
+ public static final String module = KeywordSearchUtil.class.getName();
+
+ public static Set thesaurusRelsToInclude = new HashSet();
+ public static Set thesaurusRelsForReplace = new HashSet();
+
+ static {
+ thesaurusRelsToInclude.add("KWTR_UF");
+ thesaurusRelsToInclude.add("KWTR_USE");
+ thesaurusRelsToInclude.add("KWTR_CS");
+ thesaurusRelsToInclude.add("KWTR_NT");
+ thesaurusRelsToInclude.add("KWTR_BT");
+ thesaurusRelsToInclude.add("KWTR_RT");
+
+ thesaurusRelsForReplace.add("KWTR_USE");
+ thesaurusRelsForReplace.add("KWTR_CS");
+ }
+
+ public static String getSeparators() {
+ // String separators = ";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_";
+ String seps = UtilProperties.getPropertyValue("keywordsearch", "index.keyword.separators",
";: ,.!?\t\"\'\r\n\\/()[]{}*%<>-+_");
+ return seps;
+ }
+
+ public static String getStopWordBagOr() {
+ return UtilProperties.getPropertyValue("keywordsearch", "stop.word.bag.or");
+ }
+ public static String getStopWordBagAnd() {
+ return UtilProperties.getPropertyValue("keywordsearch", "stop.word.bag.and");
+ }
+
+ public static boolean getRemoveStems() {
+ String removeStemsStr = UtilProperties.getPropertyValue("keywordsearch", "remove.stems");
+ return "true".equals(removeStemsStr);
+ }
+ public static Set getStemSet() {
+ String stemBag = UtilProperties.getPropertyValue("keywordsearch", "stem.bag");
+ Set stemSet = new TreeSet();
+ if (UtilValidate.isNotEmpty(stemBag)) {
+ String curToken;
+ StringTokenizer tokenizer = new StringTokenizer(stemBag, ": ");
+ while (tokenizer.hasMoreTokens()) {
+ curToken = tokenizer.nextToken();
+ stemSet.add(curToken);
+ }
+ }
+ return stemSet;
+ }
+
+ public static void processForKeywords(String str, Map keywords, boolean forSearch, boolean
anyPrefix, boolean anySuffix, boolean isAnd) {
+ String separators = getSeparators();
+ String stopWordBagOr = getStopWordBagOr();
+ String stopWordBagAnd = getStopWordBagAnd();
+
+ boolean removeStems = getRemoveStems();
+ Set stemSet = getStemSet();
+
+ processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems,
stemSet, forSearch, anyPrefix, anySuffix, isAnd);
+ }
+
+ public static void processKeywordsForIndex(String str, Map keywords, String separators,
String stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet) {
+ processForKeywords(str, keywords, separators, stopWordBagAnd, stopWordBagOr, removeStems,
stemSet, false, false, false, false);
+ }
+
+ public static void processForKeywords(String str, Map keywords, String separators, String
stopWordBagAnd, String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch,
boolean anyPrefix, boolean anySuffix, boolean isAnd) {
+ Set keywordSet = makeKeywordSet(str, separators, forSearch);
+ fixupKeywordSet(keywordSet, keywords, stopWordBagAnd, stopWordBagOr, removeStems,
stemSet, forSearch, anyPrefix, anySuffix, isAnd);
+ }
+
+ public static void fixupKeywordSet(Set keywordSet, Map keywords, String stopWordBagAnd,
String stopWordBagOr, boolean removeStems, Set stemSet, boolean forSearch, boolean anyPrefix,
boolean anySuffix, boolean isAnd) {
+ if (keywordSet == null) {
+ return;
+ }
+
+ Iterator keywordIter = keywordSet.iterator();
+ while (keywordIter.hasNext()) {
+ String token = (String) keywordIter.next();
+
+ // when cleaning up the tokens the ordering is inportant: check stop words, remove
stems, then get rid of 1 character tokens (1 digit okay)
+
+ // check stop words
+ String colonToken = ":" + token + ":";
+ if (forSearch) {
+ if ((isAnd && stopWordBagAnd.indexOf(colonToken) >= 0) || (!isAnd
&& stopWordBagOr.indexOf(colonToken) >= 0)) {
+ continue;
+ }
+ } else {
+ if (stopWordBagOr.indexOf(colonToken) >= 0 && stopWordBagAnd.indexOf(colonToken)
>= 0) {
+ continue;
+ }
+ }
+
+ // remove stems
+ if (removeStems) {
+ Iterator stemIter = stemSet.iterator();
+ while (stemIter.hasNext()) {
+ String stem = (String) stemIter.next();
+ if (token.endsWith(stem)) {
+ token = token.substring(0, token.length() - stem.length());
+ }
+ }
+ }
+
+ // get rid of all length 0 tokens now
+ if (token.length() == 0) {
+ continue;
+ }
+
+ // get rid of all length 1 character only tokens, pretty much useless
+ if (token.length() == 1 && Character.isLetter(token.charAt(0))) {
+ continue;
+ }
+
+ if (forSearch) {
+ StringBuffer strSb = new StringBuffer();
+ if (anyPrefix) strSb.append('%');
+ strSb.append(token);
+ if (anySuffix) strSb.append('%');
+ // replace all %% with %
+ int dblPercIdx = -1;
+ while ((dblPercIdx = strSb.indexOf("%%")) >= 0) {
+ //Debug.logInfo("before strSb: " + strSb, module);
+ strSb.replace(dblPercIdx, dblPercIdx+2, "%");
+ //Debug.logInfo("after strSb: " + strSb, module);
+ }
+ token = strSb.toString();
+ }
+
+ // group by word, add up weight
+ Long curWeight = (Long) keywords.get(token);
+ if (curWeight == null) {
+ keywords.put(token, new Long(1));
+ } else {
+ keywords.put(token, new Long(curWeight.longValue() + 1));
+ }
+ }
+ }
+
+ public static Set makeKeywordSet(String str, String separators, boolean forSearch) {
+ if (separators == null) separators = getSeparators();
+
+ Set keywords = new TreeSet();
+ if (str.length() > 0) {
+ if (forSearch) {
+ // remove %_*? from separators if is for a search
+ StringBuffer sb = new StringBuffer(separators);
+ if (sb.indexOf("%") >= 0) sb.deleteCharAt(sb.indexOf("%"));
+ if (sb.indexOf("_") >= 0) sb.deleteCharAt(sb.indexOf("_"));
+ if (sb.indexOf("*") >= 0) sb.deleteCharAt(sb.indexOf("*"));
+ if (sb.indexOf("?") >= 0) sb.deleteCharAt(sb.indexOf("?"));
+ separators = sb.toString();
+ }
+
+ StringTokenizer tokener = new StringTokenizer(str, separators, false);
+ while (tokener.hasMoreTokens()) {
+ // make sure it is lower case before doing anything else
+ String token = tokener.nextToken().toLowerCase();
+
+ if (forSearch) {
+ // these characters will only be present if it is for a search, ie not
for indexing
+ token = token.replace('*', '%');
+ token = token.replace('?', '_');
+ }
+
+ keywords.add(token);
+ }
+ }
+ return keywords;
+ }
+
+ public static Set fixKeywordsForSearch(Set keywordSet, boolean anyPrefix, boolean anySuffix,
boolean removeStems, boolean isAnd) {
+ Map keywords = new HashMap();
+ fixupKeywordSet(keywordSet, keywords, getStopWordBagAnd(), getStopWordBagOr(), removeStems,
getStemSet(), true, anyPrefix, anySuffix, isAnd);
+ return keywords.keySet();
+ }
+
+ public static boolean expandKeywordForSearch(String enteredKeyword, Set addToSet, GenericDelegator
delegator) {
+ boolean replaceEnteredKeyword = false;
+
+ try {
+ List thesaurusList = delegator.findByAndCache("KeywordThesaurus", UtilMisc.toMap("enteredKeyword",
enteredKeyword));
+ Iterator thesaurusIter = thesaurusList.iterator();
+ while (thesaurusIter.hasNext()) {
+ GenericValue keywordThesaurus = (GenericValue) thesaurusIter.next();
+ String relationshipEnumId = (String) keywordThesaurus.get("relationshipEnumId");
+ if (thesaurusRelsToInclude.contains(relationshipEnumId)) {
+ addToSet.addAll(makeKeywordSet(keywordThesaurus.getString("alternateKeyword"),
null, true));
+ if (thesaurusRelsForReplace.contains(relationshipEnumId)) {
+ replaceEnteredKeyword = true;
+ }
+ }
+ }
+ } catch (GenericEntityException e) {
+ Debug.logError(e, "Error expanding entered keyword", module);
+ }
+
+ Debug.logInfo("Expanded keyword [" + enteredKeyword + "], got set: " + addToSet,
module);
+ return replaceEnteredKeyword;
+ }
+}
Propchange: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java
------------------------------------------------------------------------------
svn:keywords = "Date Rev Author URL Id"
Propchange: incubator/ofbiz/trunk/framework/common/src/org/ofbiz/common/KeywordSearchUtil.java
------------------------------------------------------------------------------
svn:mime-type = text/plain
|