Return-Path: X-Original-To: apmail-accumulo-commits-archive@www.apache.org Delivered-To: apmail-accumulo-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 3422710870 for ; Tue, 26 Nov 2013 15:19:08 +0000 (UTC) Received: (qmail 63737 invoked by uid 500); 26 Nov 2013 15:18:40 -0000 Delivered-To: apmail-accumulo-commits-archive@accumulo.apache.org Received: (qmail 62974 invoked by uid 500); 26 Nov 2013 15:18:16 -0000 Mailing-List: contact commits-help@accumulo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@accumulo.apache.org Delivered-To: mailing list commits@accumulo.apache.org Received: (qmail 62299 invoked by uid 99); 26 Nov 2013 15:18:08 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Tue, 26 Nov 2013 15:18:08 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 53B9E9184FF; Tue, 26 Nov 2013 15:17:56 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: ecn@apache.org To: commits@accumulo.apache.org Date: Tue, 26 Nov 2013 15:18:03 -0000 Message-Id: <58a84b3665bd490d838d56749251659d@git.apache.org> In-Reply-To: <35172c83b6424670a9d2ec8231f9f5b0@git.apache.org> References: <35172c83b6424670a9d2ec8231f9f5b0@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [08/37] ACCUMULO-600 removed wikisearch from trunk http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/EventFields.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/EventFields.java b/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/EventFields.java deleted file mode 100644 index bd43088..0000000 --- a/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/EventFields.java +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.parser; - -import java.nio.ByteBuffer; -import java.util.Collection; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - -import org.apache.accumulo.core.security.ColumnVisibility; -import org.apache.accumulo.examples.wikisearch.parser.EventFields.FieldValue; - -import com.esotericsoftware.kryo.CustomSerialization; -import com.esotericsoftware.kryo.Kryo; -import com.esotericsoftware.kryo.serialize.ArraySerializer; -import com.esotericsoftware.kryo.serialize.IntSerializer; -import com.esotericsoftware.kryo.serialize.StringSerializer; -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Multimap; -import com.google.common.collect.Multiset; -import com.google.common.collect.SetMultimap; - -/** - * Object used to hold the fields in an event. This is a multimap because fields can be repeated. - */ -public class EventFields implements SetMultimap, CustomSerialization { - - private static boolean kryoInitialized = false; - private static ArraySerializer valueSerializer = null; - - private Multimap map = null; - - public static class FieldValue { - ColumnVisibility visibility; - byte[] value; - - public FieldValue(ColumnVisibility visibility, byte[] value) { - super(); - this.visibility = visibility; - this.value = value; - } - - public ColumnVisibility getVisibility() { - return visibility; - } - - public byte[] getValue() { - return value; - } - - public void setVisibility(ColumnVisibility visibility) { - this.visibility = visibility; - } - - public void setValue(byte[] value) { - this.value = value; - } - - public int size() { - return visibility.flatten().length + value.length; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(); - if (null != visibility) - buf.append(" visibility: ").append(new String(visibility.flatten())); - if (null != value) - buf.append(" value size: ").append(value.length); - if (null != value) - buf.append(" value: ").append(new String(value)); - return buf.toString(); - } - - } - - public EventFields() { - map = HashMultimap.create(); - } - - public int size() { - return map.size(); - } - - public boolean isEmpty() { - return map.isEmpty(); - } - - public boolean containsKey(Object key) { - return map.containsKey(key); - } - - public boolean containsValue(Object value) { - return map.containsValue(value); - } - - public boolean containsEntry(Object key, Object value) { - return map.containsEntry(key, value); - } - - public boolean put(String key, FieldValue value) { - return map.put(key, value); - } - - public boolean remove(Object key, Object value) { - return map.remove(key, value); - } - - public boolean putAll(String key, Iterable values) { - return map.putAll(key, values); - } - - public boolean putAll(Multimap multimap) { - return map.putAll(multimap); - } - - public void clear() { - map.clear(); - } - - public Set keySet() { - return map.keySet(); - } - - public Multiset keys() { - return map.keys(); - } - - public Collection values() { - return map.values(); - } - - public Set get(String key) { - return (Set) map.get(key); - } - - public Set removeAll(Object key) { - return (Set) map.removeAll(key); - } - - public Set replaceValues(String key, Iterable values) { - return (Set) map.replaceValues(key, values); - } - - public Set> entries() { - return (Set>) map.entries(); - } - - public Map> asMap() { - return map.asMap(); - } - - public int getByteSize() { - int count = 0; - for (Entry e : map.entries()) { - count += e.getKey().getBytes().length + e.getValue().size(); - } - return count; - } - - @Override - public String toString() { - StringBuilder buf = new StringBuilder(); - for (Entry entry : map.entries()) { - buf.append("\tkey: ").append(entry.getKey()).append(" -> ").append(entry.getValue().toString()).append("\n"); - } - return buf.toString(); - } - - public static synchronized void initializeKryo(Kryo kryo) { - if (kryoInitialized) - return; - valueSerializer = new ArraySerializer(kryo); - valueSerializer.setDimensionCount(1); - valueSerializer.setElementsAreSameType(true); - valueSerializer.setCanBeNull(false); - valueSerializer.setElementsCanBeNull(false); - kryo.register(byte[].class, valueSerializer); - kryoInitialized = true; - } - - public void readObjectData(Kryo kryo, ByteBuffer buf) { - if (!kryoInitialized) - EventFields.initializeKryo(kryo); - // Read in the number of map entries - int entries = IntSerializer.get(buf, true); - for (int i = 0; i < entries; i++) { - // Read in the key - String key = StringSerializer.get(buf); - // Read in the fields in the value - ColumnVisibility vis = new ColumnVisibility(valueSerializer.readObjectData(buf, byte[].class)); - byte[] value = valueSerializer.readObjectData(buf, byte[].class); - map.put(key, new FieldValue(vis, value)); - } - - } - - public void writeObjectData(Kryo kryo, ByteBuffer buf) { - if (!kryoInitialized) - EventFields.initializeKryo(kryo); - // Write out the number of entries; - IntSerializer.put(buf, map.size(), true); - for (Entry entry : map.entries()) { - // Write the key - StringSerializer.put(buf, entry.getKey()); - // Write the fields in the value - valueSerializer.writeObjectData(buf, entry.getValue().getVisibility().flatten()); - valueSerializer.writeObjectData(buf, entry.getValue().getValue()); - } - } - -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java b/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java deleted file mode 100644 index acfb4f4..0000000 --- a/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java +++ /dev/null @@ -1,1139 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.parser; - -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.Enumeration; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - -import javax.swing.tree.DefaultMutableTreeNode; - -import org.apache.accumulo.examples.wikisearch.parser.QueryParser.QueryTerm; -import org.apache.accumulo.examples.wikisearch.parser.RangeCalculator.RangeBounds; -import org.apache.commons.jexl2.parser.ASTAndNode; -import org.apache.commons.jexl2.parser.ASTEQNode; -import org.apache.commons.jexl2.parser.ASTERNode; -import org.apache.commons.jexl2.parser.ASTGENode; -import org.apache.commons.jexl2.parser.ASTGTNode; -import org.apache.commons.jexl2.parser.ASTJexlScript; -import org.apache.commons.jexl2.parser.ASTLENode; -import org.apache.commons.jexl2.parser.ASTLTNode; -import org.apache.commons.jexl2.parser.ASTNENode; -import org.apache.commons.jexl2.parser.ASTNRNode; -import org.apache.commons.jexl2.parser.ASTNotNode; -import org.apache.commons.jexl2.parser.ASTOrNode; -import org.apache.commons.jexl2.parser.ParseException; -import org.apache.commons.jexl2.parser.ParserTreeConstants; -import org.apache.hadoop.io.Text; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; - - -import com.google.common.collect.HashMultimap; -import com.google.common.collect.Multimap; - -/** - * The server-side field index queries can only support operations on indexed fields. Additionally, queries that have differing ranges (i.e. one range at the - * fieldname level and another at the fieldValue level) are not currently supported. This class removes these conflicts from the query as well as sets proper - * capitalization configurations etc. - * - * Once the query has been modified, you can pass it to the BooleanLogicIterator on the server-side via the options map. - * - */ -public class FieldIndexQueryReWriter { - - protected static final Logger log = Logger.getLogger(FieldIndexQueryReWriter.class); - public static final String INDEXED_TERMS_LIST = "INDEXED_TERMS_LIST"; // comma separated list of indexed terms. - public static Set rangeNodeSet; - - static { - rangeNodeSet = new HashSet(); - rangeNodeSet.add(ParserTreeConstants.JJTLENODE); - rangeNodeSet.add(ParserTreeConstants.JJTLTNODE); - rangeNodeSet.add(ParserTreeConstants.JJTGENODE); - rangeNodeSet.add(ParserTreeConstants.JJTGTNODE); - rangeNodeSet = Collections.unmodifiableSet(rangeNodeSet); - } - - /* - * Given a JEXL Query, rewrite it and return it. - * - * 1. ParseQuery 2. Transform query 3. Refactor query 4. remove non-indexed terms a. remove any tree conflicts b. collapse any branches 7. add normalized - * values 8. adjust for case sensitivity 9. add prefix.. but jexl chokes on null byte - */ - public static void setLogLevel(Level lev) { - log.setLevel(lev); - } - - /** - * - * @param query - * @param options - * @return String representation of a given query. - * @throws ParseException - * @throws Exception - */ - public String removeNonIndexedTermsAndInvalidRanges(String query, Map options) throws ParseException, Exception { - Multimap indexedTerms = parseIndexedTerms(options); - RewriterTreeNode node = parseJexlQuery(query); - if (log.isDebugEnabled()) { - log.debug("Tree: " + node.getContents()); - } - node = removeNonIndexedTerms(node, indexedTerms); - node = removeTreeConflicts(node, indexedTerms); - node = collapseBranches(node); - node = removeNegationViolations(node); - - if (log.isDebugEnabled()) { - log.debug("Tree -NonIndexed: " + node.getContents()); - } - return rebuildQueryFromTree(node); - } - - /** - * - * @param query - * @param options - * @return String representation of a given query. - * @throws ParseException - * @throws Exception - */ - public String applyNormalizedTerms(String query, Map options) throws ParseException, Exception { - if (log.isDebugEnabled()) { - log.debug("applyNormalizedTerms, query: " + query); - } - Multimap normalizedTerms = parseIndexedTerms(options); - RewriterTreeNode node = parseJexlQuery(query); - if (log.isDebugEnabled()) { - log.debug("applyNormalizedTerms, Tree: " + node.getContents()); - } - node = orNormalizedTerms(node, normalizedTerms); - if (log.isDebugEnabled()) { - log.debug("applyNormalizedTerms,Normalized: " + node.getContents()); - } - return rebuildQueryFromTree(node); - } - - /** - * - * @param query - * @param fNameUpper - * @param fValueUpper - * @return String representation of a given query. - * @throws ParseException - */ - public String applyCaseSensitivity(String query, boolean fNameUpper, boolean fValueUpper) throws ParseException { - RewriterTreeNode node = parseJexlQuery(query); - if (log.isDebugEnabled()) { - log.debug("Tree: " + node.getContents()); - } - node = applyCaseSensitivity(node, fNameUpper, fValueUpper); - if (log.isDebugEnabled()) { - log.debug("Case: " + node.getContents()); - } - return rebuildQueryFromTree(node); - } - - private String rebuildQueryFromTree(RewriterTreeNode node) { - if (node.isLeaf()) { - String fName = node.getFieldName(); - String fValue = node.getFieldValue(); - String operator = node.getOperator(); - if (node.isNegated()) { - if (node.getType() == JexlOperatorConstants.JJTEQNODE) { - operator = JexlOperatorConstants.getOperator(JexlOperatorConstants.JJTNENODE); - } else if (node.getType() == JexlOperatorConstants.JJTERNODE) { - operator = JexlOperatorConstants.getOperator(JexlOperatorConstants.JJTNRNODE); - } else if (node.getType() == JexlOperatorConstants.JJTLTNODE) { - operator = JexlOperatorConstants.getOperator(JexlOperatorConstants.JJTGENODE); - } else if (node.getType() == JexlOperatorConstants.JJTLENODE) { - operator = JexlOperatorConstants.getOperator(JexlOperatorConstants.JJTGTNODE); - } else if (node.getType() == JexlOperatorConstants.JJTGTNODE) { - operator = JexlOperatorConstants.getOperator(JexlOperatorConstants.JJTLENODE); - } else if (node.getType() == JexlOperatorConstants.JJTGENODE) { - operator = JexlOperatorConstants.getOperator(JexlOperatorConstants.JJTLTNODE); - } - } - return fName + operator + "'" + fValue + "'"; - } else { - List parts = new ArrayList(); - Enumeration children = node.children(); - while (children.hasMoreElements()) { - RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); - parts.add(rebuildQueryFromTree(child)); - } - if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) { - return org.apache.commons.lang.StringUtils.join(parts, ""); - } - String op = " " + JexlOperatorConstants.getOperator(node.getType()) + " "; - if (log.isDebugEnabled()) { - log.debug("Operator: " + op); - } - String query = org.apache.commons.lang.StringUtils.join(parts, op); - query = "(" + query + ")"; - return query; - } - } - - /* - * Don't use this, Jexl currently chokes on null bytes in the query - */ - // public String applyFieldNamePrefix(String query, String prefix) throws ParseException { - // NuwaveTreeNode node = parseJexlQuery(query); - // if (log.isDebugEnabled()) { - // log.debug("Tree: " + node.getContents()); - // } - // node = applyFieldNamePrefix(node, prefix); - // if (log.isDebugEnabled()) { - // log.debug("Prefix: " + node.getContents()); - // } - // return null; - // } - private RewriterTreeNode parseJexlQuery(String query) throws ParseException { - if (log.isDebugEnabled()) { - log.debug("parseJexlQuery, query: " + query); - } - QueryParser parser = new QueryParser(); - parser.execute(query); - TreeNode tree = parser.getIteratorTree(); - RewriterTreeNode root = transformTreeNode(tree); - if (log.isDebugEnabled()) { - log.debug("parseJexlQuery, transformedTree: " + root.getContents()); - } - root = refactorTree(root); - if (log.isDebugEnabled()) { - log.debug("parseJexlQuery, refactorTree: " + root.getContents()); - } - return root; - } - - /* - * - */ - private RewriterTreeNode transformTreeNode(TreeNode node) throws ParseException { - if (node.getType().equals(ASTEQNode.class) || node.getType().equals(ASTNENode.class)) { - if (log.isDebugEnabled()) { - log.debug("transformTreeNode, Equals Node"); - } - - Multimap terms = node.getTerms(); - for (String fName : terms.keySet()) { - Collection values = terms.get(fName); - - for (QueryTerm t : values) { - if (null == t || null == t.getValue()) { - continue; - } - String fValue = t.getValue().toString(); - fValue = fValue.replaceAll("'", ""); - boolean negated = t.getOperator().equals("!="); - RewriterTreeNode child = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, fValue, negated); - return child; - } - } - } - - if (node.getType().equals(ASTERNode.class) || node.getType().equals(ASTNRNode.class)) { - if (log.isDebugEnabled()) { - log.debug("transformTreeNode, Regex Node"); - } - - Multimap terms = node.getTerms(); - for (String fName : terms.keySet()) { - Collection values = terms.get(fName); - for (QueryTerm t : values) { - if (null == t || null == t.getValue()) { - continue; - } - String fValue = t.getValue().toString(); - fValue = fValue.replaceAll("'", ""); - boolean negated = node.getType().equals(ASTNRNode.class); - RewriterTreeNode child = new RewriterTreeNode(ParserTreeConstants.JJTERNODE, fName, fValue, negated); - return child; - } - } - } - - if (node.getType().equals(ASTLTNode.class) || node.getType().equals(ASTLENode.class) || node.getType().equals(ASTGTNode.class) - || node.getType().equals(ASTGENode.class)) { - if (log.isDebugEnabled()) { - log.debug("transformTreeNode, LT/LE/GT/GE node"); - } - Multimap terms = node.getTerms(); - for (String fName : terms.keySet()) { - Collection values = terms.get(fName); - for (QueryTerm t : values) { - if (null == t || null == t.getValue()) { - continue; - } - String fValue = t.getValue().toString(); - fValue = fValue.replaceAll("'", "").toLowerCase(); - boolean negated = false; // to be negated, must be child of Not, which is handled elsewhere. - int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator()); - RewriterTreeNode child = new RewriterTreeNode(mytype, fName, fValue, negated); - return child; - } - } - } - - RewriterTreeNode returnNode = null; - - if (node.getType().equals(ASTAndNode.class) || node.getType().equals(ASTOrNode.class)) { - int parentType = node.getType().equals(ASTAndNode.class) ? ParserTreeConstants.JJTANDNODE : ParserTreeConstants.JJTORNODE; - if (log.isDebugEnabled()) { - log.debug("transformTreeNode, AND/OR node: " + parentType); - } - if (node.isLeaf() || !node.getTerms().isEmpty()) { - returnNode = new RewriterTreeNode(parentType); - Multimap terms = node.getTerms(); - for (String fName : terms.keySet()) { - Collection values = terms.get(fName); - for (QueryTerm t : values) { - if (null == t || null == t.getValue()) { - continue; - } - String fValue = t.getValue().toString(); - fValue = fValue.replaceAll("'", ""); - boolean negated = t.getOperator().equals("!="); - int childType = JexlOperatorConstants.getJJTNodeType(t.getOperator()); - RewriterTreeNode child = new RewriterTreeNode(childType, fName, fValue, negated); - if (log.isDebugEnabled()) { - log.debug("adding child node: " + child.getContents()); - } - returnNode.add(child); - } - } - } else { - returnNode = new RewriterTreeNode(parentType); - } - } else if (node.getType().equals(ASTNotNode.class)) { - if (log.isDebugEnabled()) { - log.debug("transformTreeNode, NOT node"); - } - if (node.isLeaf()) { - // NOTE: this should be cleaned up a bit. - Multimap terms = node.getTerms(); - for (String fName : terms.keySet()) { - Collection values = terms.get(fName); - for (QueryTerm t : values) { - if (null == t || null == t.getValue()) { - continue; - } - String fValue = t.getValue().toString(); - fValue = fValue.replaceAll("'", "").toLowerCase(); - boolean negated = !t.getOperator().equals("!="); - int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator()); - return new RewriterTreeNode(mytype, fName, fValue, negated); - } - } - } else { - returnNode = new RewriterTreeNode(ParserTreeConstants.JJTNOTNODE); - } - } else if (node.getType().equals(ASTJexlScript.class) || node.getType().getSimpleName().equals("RootNode")) { - - if (log.isDebugEnabled()) { - log.debug("transformTreeNode, ROOT/JexlScript node"); - } - if (node.isLeaf()) { - returnNode = new RewriterTreeNode(ParserTreeConstants.JJTJEXLSCRIPT); - // NOTE: this should be cleaned up a bit. - Multimap terms = node.getTerms(); - for (String fName : terms.keySet()) { - Collection values = terms.get(fName); - for (QueryTerm t : values) { - if (null == t || null == t.getValue()) { - continue; - } - String fValue = t.getValue().toString(); - fValue = fValue.replaceAll("'", ""); - boolean negated = t.getOperator().equals("!="); - int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator()); - RewriterTreeNode child = new RewriterTreeNode(mytype, fName, fValue, negated); - returnNode.add(child); - return returnNode; - } - } - } else { - returnNode = new RewriterTreeNode(ParserTreeConstants.JJTJEXLSCRIPT); - } - } else { - log.error("transformTreeNode, Currently Unsupported Node type: " + node.getClass().getName() + " \t" + node.getType()); - } - for (TreeNode child : node.getChildren()) { - returnNode.add(transformTreeNode(child)); - } - - return returnNode; - } - - private RewriterTreeNode removeNonIndexedTerms(RewriterTreeNode root, Multimap indexedTerms) throws Exception { - // public void removeNonIndexedTerms(BooleanLogicTreeNodeJexl myroot, String indexedTerms) throws Exception { - if (indexedTerms.isEmpty()) { - throw new Exception("removeNonIndexedTerms, indexed Terms empty"); - } - - // NOTE: doing a depth first enumeration didn't work when I started - // removing nodes halfway through. The following method does work, - // it's essentially a reverse breadth first traversal. - List nodes = new ArrayList(); - Enumeration bfe = root.breadthFirstEnumeration(); - - while (bfe.hasMoreElements()) { - RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement(); - nodes.add(node); - } - - // walk backwards - for (int i = nodes.size() - 1; i >= 0; i--) { - RewriterTreeNode node = nodes.get(i); - if (log.isDebugEnabled()) { - log.debug("removeNonIndexedTerms, analyzing node: " + node.toString() + " " + node.printNode()); - } - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { - // If all of your children are gone, AND/OR has no purpose, remove - if (node.getChildCount() == 0) { - node.removeFromParent(); - - // If AND/OR has only 1 child, attach it to the parent directly. - } else if (node.getChildCount() == 1) { - RewriterTreeNode p = (RewriterTreeNode) node.getParent(); - RewriterTreeNode c = (RewriterTreeNode) node.getFirstChild(); - node.removeFromParent(); - p.add(c); - } - } else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) { // Head node - // If head node has no children, we have nothing to search on. - if (node.getChildCount() == 0) { - throw new Exception(); - } - } else if (rangeNodeSet.contains(node.getType())) { // leave it alone - // leave ranges untouched, they'll be handled elsewhere. - continue; - } else { - if (log.isDebugEnabled()) { - log.debug("removeNonIndexedTerms, Testing: " + node.getFieldName() + ":" + node.getFieldValue()); - } - - if (!indexedTerms.containsKey(node.getFieldName().toString() + ":" + node.getFieldValue().toString())) { - if (log.isDebugEnabled()) { - log.debug(node.getFieldName() + ":" + node.getFieldValue() + " is NOT indexed"); - } - node.removeFromParent(); - } else { - if (log.isDebugEnabled()) { - log.debug(node.getFieldName() + ":" + node.getFieldValue() + " is indexed"); - } - } - } - } - - return root; - } - - private RewriterTreeNode orNormalizedTerms(RewriterTreeNode myroot, Multimap indexedTerms) throws Exception { - // we have multimap of FieldName to multiple FieldValues - if (indexedTerms.isEmpty()) { - throw new Exception("indexed Terms empty"); - } - try { - // NOTE: doing a depth first enumeration didn't work when I started - // removing nodes halfway through. The following method does work, - // it's essentially a reverse breadth first traversal. - List nodes = new ArrayList(); - Enumeration bfe = myroot.breadthFirstEnumeration(); - - while (bfe.hasMoreElements()) { - RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement(); - nodes.add(node); - } - - // walk backwards - for (int i = nodes.size() - 1; i >= 0; i--) { - RewriterTreeNode node = nodes.get(i); - if (log.isDebugEnabled()) { - log.debug("orNormalizedTerms, analyzing node: " + node.toString() + " " + node.printNode()); - } - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { - continue; - } else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) { - if (node.getChildCount() == 0) { - if (log.isDebugEnabled()) { - log.debug("orNormalizedTerms: Head node has no children!"); - } - throw new Exception(); // Head node has no children. - } - } else { - if (log.isDebugEnabled()) { - log.debug("Testing data location: " + node.getFieldName()); - } - String fName = node.getFieldName().toString(); - String fValue = node.getFieldValue().toString(); - if (indexedTerms.containsKey(fName + ":" + fValue)) { - - if (indexedTerms.get(fName + ":" + fValue).size() > 1) { - // Replace node with an OR, and make children from the multimap collection - node.setType(ParserTreeConstants.JJTORNODE); - boolean neg = node.isNegated(); - node.setNegated(false); - node.setFieldName(null); - node.setFieldValue(null); - Collection values = indexedTerms.get(fName + ":" + fValue); - for (String value : values) { - RewriterTreeNode n = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, value, neg); - node.add(n); - } - } else if (indexedTerms.get(fName + ":" + fValue).size() == 1) { - // Straight replace - Collection values = indexedTerms.get(fName + ":" + fValue); - for (String val : values) { - // should only be 1 - node.setFieldValue(val); - } - } - - } else { - // throw new Exception("orNormalizedTerms, encountered a non-indexed term: " + node.getFieldName().toString()); - } - } - } - } catch (Exception e) { - log.debug("Caught exception in orNormalizedTerms(): " + e); - throw new Exception("exception in: orNormalizedTerms"); - } - - return myroot; - } - - /*** - * We only want to pass ranges on if they meet a very narrow set of conditions. All ranges must be bounded i.e. x between(1,5) so their parent is an AND. We - * will only pass a range if 1. The AND is the direct child of HEAD node 2. The AND is a child of an OR which is a direct child of HEAD node. - * - * If there is an HEAD-AND[x,OR[b,AND[range]]], and you remove the range, this turns the tree into HEAD-AND[X,OR[B]] which becomes HEAD-AND[X,B] which will - * miss entries, so you need to cut out the entire OR at this point and let the positive side of the AND pick it up. - */ - private RewriterTreeNode removeTreeConflicts(RewriterTreeNode root, Multimap indexedTerms) { - if (log.isDebugEnabled()) { - log.debug("removeTreeConflicts"); - } - - /* - * You can't modify the enumeration, so save it into a list. We want to walk backwards in a breadthFirstEnumeration. So we don't throw null pointers when we - * erase nodes and shorten our list. - */ - List nodeList = new ArrayList(); - Enumeration nodes = root.breadthFirstEnumeration(); - while (nodes.hasMoreElements()) { - RewriterTreeNode child = (RewriterTreeNode) nodes.nextElement(); - nodeList.add(child); - } - - // walk backwards - for (int i = nodeList.size() - 1; i >= 0; i--) { - RewriterTreeNode node = nodeList.get(i); - - if (node.isRemoval()) { - node.removeFromParent(); - continue; - } - - RewriterTreeNode parent = (RewriterTreeNode) node.getParent(); - /* - * All ranges must be bounded! This means the range must be part of an AND, and the parent of AND must be a HEAD node or an OR whose parent is a HEAD - * node. - */ - if (node.getType() == ParserTreeConstants.JJTANDNODE - && (node.getLevel() == 1 || (parent.getType() == ParserTreeConstants.JJTORNODE && parent.getLevel() == 1))) { - - if (log.isDebugEnabled()) { - log.debug("AND at level 1 or with OR parent at level 1"); - } - Map rangeMap = getBoundedRangeMap(node); - - // can't modify the enumeration... save children to a list. - List childList = new ArrayList(); - Enumeration children = node.children(); - while (children.hasMoreElements()) { - RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); - childList.add(child); - } - - for (int j = childList.size() - 1; j >= 0; j--) { - RewriterTreeNode child = childList.get(j); - // currently we are not allowing unbounded ranges, so they must sit under an AND node. - if (rangeNodeSet.contains(child.getType())) { - if (log.isDebugEnabled()) { - log.debug("child type: " + JexlOperatorConstants.getOperator(child.getType())); - } - if (rangeMap == null) { - // remove - child.removeFromParent(); - } else { - if (!rangeMap.containsKey(new Text(child.getFieldName()))) { - child.removeFromParent(); - } else { - // check if it has a single non-range sibling - boolean singleSib = false; - if (log.isDebugEnabled()) { - log.debug("checking for singleSib."); - } - Enumeration sibs = child.getParent().children(); - while (sibs.hasMoreElements()) { - RewriterTreeNode sib = (RewriterTreeNode) sibs.nextElement(); - if (!rangeNodeSet.contains(sib.getType())) { - singleSib = true; - break; - } - } - if (singleSib) { - child.removeFromParent(); - } else { - if (indexedTerms.containsKey(child.getFieldName() + ":" + child.getFieldValue())) { - if (log.isDebugEnabled()) { - log.debug("removeTreeConflicts, node: " + node.getContents()); - } - // swap parent AND with an OR - node.removeAllChildren(); - node.setType(ParserTreeConstants.JJTORNODE); - - Collection values = indexedTerms.get(child.getFieldName() + ":" + child.getFieldValue()); - for (String value : values) { - RewriterTreeNode n = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, child.getFieldName(), value, child.isNegated()); - node.add(n); - } - if (log.isDebugEnabled()) { - log.debug("removeTreeConflicts, node: " + node.getContents()); - } - - break; - } else { - child.removeFromParent(); - } - - } - } - } - } - }// end inner for - - } else { // remove all ranges! - if (node.isLeaf()) { - continue; - } - // can't modify the enumeration... - List childList = new ArrayList(); - Enumeration children = node.children(); - while (children.hasMoreElements()) { - RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); - childList.add(child); - } - - // walk backwards - for (int j = childList.size() - 1; j >= 0; j--) { - - RewriterTreeNode child = childList.get(j); - if (log.isDebugEnabled()) { - log.debug("removeTreeConflicts, looking at node: " + node); - } - if (rangeNodeSet.contains(child.getType())) { - // if grand parent is an OR and not top level, mark whole thing for removal. - RewriterTreeNode grandParent = (RewriterTreeNode) child.getParent().getParent(); - if (grandParent.getType() == ParserTreeConstants.JJTORNODE && grandParent.getLevel() != 1) { - grandParent.setRemoval(true); - } - child.removeFromParent(); - } - } - } - - }// end outer for - - return root; - } - - private RewriterTreeNode removeNegationViolations(RewriterTreeNode node) throws Exception { - // Double check the top level node for negation violations - // if AND, one child must be positive, if OR, no negatives allowed. - RewriterTreeNode one = (RewriterTreeNode) node.getFirstChild(); // Head node has only 1 child. - ArrayList childrenList = new ArrayList(); - Enumeration children = one.children(); - while (children.hasMoreElements()) { - RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); - childrenList.add(child); - } - if (one.getType() == JexlOperatorConstants.JJTORNODE) { - for (RewriterTreeNode child : childrenList) { - if (child.isNegated()) { - child.removeFromParent(); - } - } - if (one.getChildCount() == 0) { - throw new Exception("FieldIndexQueryReWriter: Top level query node cannot be processed."); - } - } else if (one.getType() == JexlOperatorConstants.JJTANDNODE) { - boolean ok = false; - for (RewriterTreeNode child : childrenList) { - if (!child.isNegated()) { - ok = true; - break; - } - } - if (!ok) { - throw new Exception("FieldIndexQueryReWriter: Top level query node cannot be processed."); - } - } - - return node; - } - - // After tree conflicts have been resolve, we can collapse branches where - // leaves have been pruned. - private RewriterTreeNode collapseBranches(RewriterTreeNode myroot) throws Exception { - - // NOTE: doing a depth first enumeration didn't wory when I started - // removing nodes halfway through. The following method does work, - // it's essentially a reverse breadth first traversal. - List nodes = new ArrayList(); - Enumeration bfe = myroot.breadthFirstEnumeration(); - - while (bfe.hasMoreElements()) { - RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement(); - nodes.add(node); - } - - // walk backwards - for (int i = nodes.size() - 1; i >= 0; i--) { - RewriterTreeNode node = nodes.get(i); - if (log.isDebugEnabled()) { - log.debug("collapseBranches, inspecting node: " + node.toString() + " " + node.printNode()); - } - - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { - if (node.getChildCount() == 0) { - node.removeFromParent(); - } else if (node.getChildCount() == 1) { - RewriterTreeNode p = (RewriterTreeNode) node.getParent(); - RewriterTreeNode c = (RewriterTreeNode) node.getFirstChild(); - node.removeFromParent(); - p.add(c); - - } - } else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) { - if (node.getChildCount() == 0) { - throw new Exception(); - } - } - } - return myroot; - } - - /** - * @param options - */ - public Multimap parseIndexedTerms(Map options) { - if (options.get(INDEXED_TERMS_LIST) != null) { - Multimap mmap = HashMultimap.create(); - String[] items = options.get(INDEXED_TERMS_LIST).split(";"); - for (String item : items) { - item = item.trim(); - if (log.isDebugEnabled()) {} - String[] parts = item.split(":"); - if (log.isDebugEnabled()) { - log.debug("adding: " + parts[0]); - } - for (int i = 2; i < parts.length; i++) { - // key is original query token, i.e. color:red - mmap.put(parts[0] + ":" + parts[1], parts[i]); - - } - - } - if (log.isDebugEnabled()) { - log.debug("multimap: " + mmap); - } - return mmap; - } - if (log.isDebugEnabled()) { - log.debug("parseIndexedTerms: returning null"); - } - return null; - } - - /** - * @param root - */ - public RewriterTreeNode refactorTree(RewriterTreeNode root) { - Enumeration dfe = root.breadthFirstEnumeration(); - - while (dfe.hasMoreElements()) { - RewriterTreeNode n = (RewriterTreeNode) dfe.nextElement(); - - if (n.getType() == ParserTreeConstants.JJTNOTNODE) {// BooleanLogicTreeNode.NodeType.NOT) { - RewriterTreeNode child = (RewriterTreeNode) n.getChildAt(0); - child.setNegated(true); - RewriterTreeNode parent = (RewriterTreeNode) n.getParent(); - parent.remove(n); - parent.add(child); - } - } - - // cycle through again and distribute nots - Enumeration bfe = root.breadthFirstEnumeration(); - RewriterTreeNode child; - - while (bfe.hasMoreElements()) { - child = (RewriterTreeNode) bfe.nextElement(); - - if (child.isNegated()) { - if (child.getChildCount() > 0) { - demorganSubTree(child); - - } - } - } - return root; - - } - - private void demorganSubTree(RewriterTreeNode root) { - - root.setNegated(false); - // root.setChildrenAllNegated(true); - - if (root.getType() == ParserTreeConstants.JJTANDNODE) {// BooleanLogicTreeNode.NodeType.AND) { - // root.setType(BooleanLogicTreeNode.NodeType.OR); - root.setType(ParserTreeConstants.JJTORNODE); - } else if (root.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) { - // root.setType(BooleanLogicTreeNode.NodeType.AND); - root.setType(ParserTreeConstants.JJTANDNODE); - } else if (root.getType() == ParserTreeConstants.JJTEQNODE || root.getType() == ParserTreeConstants.JJTERNODE) { - // do nothing - } else { - log.error("refactorSubTree, node type not supported"); - } - - Enumeration children = root.children(); - RewriterTreeNode child = null; - // now distribute the negative - - while (children.hasMoreElements()) { - child = (RewriterTreeNode) children.nextElement(); - if (child.isNegated()) { - child.setNegated(false); - } else { - child.setNegated(true); - } - } - } - - private RewriterTreeNode applyCaseSensitivity(RewriterTreeNode root, boolean fnUpper, boolean fvUpper) { - // for each leaf, apply case sensitivity - Enumeration bfe = root.breadthFirstEnumeration(); - while (bfe.hasMoreElements()) { - RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement(); - if (node.isLeaf()) { - String fName = fnUpper ? node.getFieldName().toUpperCase() : node.getFieldName().toLowerCase(); - node.setFieldName(fName); - - String fValue = fvUpper ? node.getFieldValue().toUpperCase() : node.getFieldValue().toLowerCase(); - node.setFieldValue(fValue); - - } - } - return root; - } - - private Map getBoundedRangeMap(RewriterTreeNode node) { - - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { - Enumeration children = node.children(); - Map rangeMap = new HashMap(); - while (children.hasMoreElements()) { - RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); - if (child.getType() == ParserTreeConstants.JJTLENODE || child.getType() == ParserTreeConstants.JJTLTNODE) { - Text fName = new Text(child.getFieldName()); - if (rangeMap.containsKey(fName)) { - RangeBounds rb = rangeMap.get(fName); - if (rb.getLower() != null) { - log.error("testBoundedRangeExistence, two lower bounds exist for bounded range."); - } - rb.setLower(new Text(child.getFieldValue())); - } else { - RangeBounds rb = new RangeBounds(); - rb.setLower(new Text(child.getFieldValue())); - rangeMap.put(new Text(child.getFieldName()), rb); - } - - } else if (child.getType() == ParserTreeConstants.JJTGENODE || child.getType() == ParserTreeConstants.JJTGTNODE) { - Text fName = new Text(child.getFieldName()); - if (rangeMap.containsKey(fName)) { - RangeBounds rb = rangeMap.get(fName); - if (rb.getUpper() != null) { - log.error("testBoundedRangeExistence, two Upper bounds exist for bounded range."); - } - rb.setUpper(new Text(child.getFieldValue())); - } else { - RangeBounds rb = new RangeBounds(); - rb.setUpper(new Text(child.getFieldValue())); - rangeMap.put(new Text(child.getFieldName()), rb); - } - } - } - - for (Entry entry : rangeMap.entrySet()) { - RangeBounds rb = entry.getValue(); - if (rb.getLower() == null || rb.getUpper() == null) { - // unbounded range, remove - if (log.isDebugEnabled()) { - log.debug("testBoundedRangeExistence: Unbounded Range detected, removing entry from rangeMap"); - } - rangeMap.remove(entry.getKey()); - } - } - if (!rangeMap.isEmpty()) { - return rangeMap; - } - } - - return null; - } - - /** - * INNER CLASSES - */ - public class RewriterTreeNode extends DefaultMutableTreeNode { - - private static final long serialVersionUID = 1L; - private boolean negated = false; - private String fieldName; - private String fieldValue; - private String operator; - private int type; - private boolean removal = false; - - /** - * - * @param type - */ - public RewriterTreeNode(int type) { - super(); - this.type = type; - } - - /** - * - * @param type - * @param fName - * @param fValue - */ - public RewriterTreeNode(int type, String fName, String fValue) { - super(); - init(type, fName, fValue); - } - - /** - * - * @param type - * @param fName - * @param fValue - * @param negate - */ - public RewriterTreeNode(int type, String fName, String fValue, boolean negate) { - super(); - init(type, fName, fValue, negate); - } - - private void init(int type, String fName, String fValue) { - init(type, fName, fValue, false); - } - - private void init(int type, String fName, String fValue, boolean negate) { - this.type = type; - this.fieldName = fName; - this.fieldValue = fValue; - this.negated = negate; - this.operator = JexlOperatorConstants.getOperator(type); - if (log.isDebugEnabled()) { - log.debug("FN: " + this.fieldName + " FV: " + this.fieldValue + " Op: " + this.operator); - } - } - - /** - * @return The field name. - */ - public String getFieldName() { - return fieldName; - } - - /** - * - * @param fieldName - */ - public void setFieldName(String fieldName) { - this.fieldName = fieldName; - } - - /** - * - * @return The field value. - */ - public String getFieldValue() { - return fieldValue; - } - - /** - * - * @param fieldValue - */ - public void setFieldValue(String fieldValue) { - this.fieldValue = fieldValue; - } - - /** - * - * @return true if negated, otherwise false. - */ - public boolean isNegated() { - return negated; - } - - /** - * - * @param negated - */ - public void setNegated(boolean negated) { - this.negated = negated; - } - - /** - * - * @return The operator. - */ - public String getOperator() { - return operator; - } - - /** - * - * @param operator - */ - public void setOperator(String operator) { - this.operator = operator; - } - - /** - * - * @return The type. - */ - public int getType() { - return type; - } - - /** - * - * @param type - */ - public void setType(int type) { - this.type = type; - } - - public boolean isRemoval() { - return removal; - } - - public void setRemoval(boolean removal) { - this.removal = removal; - } - - public String getContents() { - StringBuilder s = new StringBuilder("["); - s.append(toString()); - - if (children != null) { - Enumeration e = this.children(); - while (e.hasMoreElements()) { - RewriterTreeNode n = (RewriterTreeNode) e.nextElement(); - s.append(","); - s.append(n.getContents()); - } - } - s.append("]"); - return s.toString(); - } - - /** - * - * @return A string represenation of the field name and value. - */ - public String printNode() { - StringBuilder s = new StringBuilder("["); - s.append("Full Location & Term = "); - if (this.fieldName != null) { - s.append(this.fieldName.toString()); - } else { - s.append("BlankDataLocation"); - } - s.append(" "); - if (this.fieldValue != null) { - s.append(this.fieldValue.toString()); - } else { - s.append("BlankTerm"); - } - s.append("]"); - return s.toString(); - } - - @Override - public String toString() { - switch (type) { - case ParserTreeConstants.JJTEQNODE: - return fieldName + ":" + fieldValue + ":negated=" + isNegated(); - case ParserTreeConstants.JJTNENODE: - return fieldName + ":" + fieldValue + ":negated=" + isNegated(); - case ParserTreeConstants.JJTERNODE: - return fieldName + ":" + fieldValue + ":negated=" + isNegated(); - case ParserTreeConstants.JJTNRNODE: - return fieldName + ":" + fieldValue + ":negated=" + isNegated(); - case ParserTreeConstants.JJTLENODE: - return fieldName + ":" + fieldValue + ":negated=" + isNegated(); - case ParserTreeConstants.JJTLTNODE: - return fieldName + ":" + fieldValue + ":negated=" + isNegated(); - case ParserTreeConstants.JJTGENODE: - return fieldName + ":" + fieldValue + ":negated=" + isNegated(); - case ParserTreeConstants.JJTGTNODE: - return fieldName + ":" + fieldValue + ":negated=" + isNegated(); - case ParserTreeConstants.JJTJEXLSCRIPT: - return "HEAD"; - case ParserTreeConstants.JJTANDNODE: - return "AND"; - case ParserTreeConstants.JJTNOTNODE: - return "NOT"; - case ParserTreeConstants.JJTORNODE: - return "OR"; - default: - System.out.println("Problem in NuwaveTreeNode.toString()"); - return null; - } - } - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/JexlOperatorConstants.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/JexlOperatorConstants.java b/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/JexlOperatorConstants.java deleted file mode 100644 index 29898a3..0000000 --- a/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/JexlOperatorConstants.java +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.parser; - -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import org.apache.commons.jexl2.parser.ASTAndNode; - -import org.apache.commons.jexl2.parser.ASTEQNode; -import org.apache.commons.jexl2.parser.ASTERNode; -import org.apache.commons.jexl2.parser.ASTFunctionNode; -import org.apache.commons.jexl2.parser.ASTGENode; -import org.apache.commons.jexl2.parser.ASTGTNode; -import org.apache.commons.jexl2.parser.ASTLENode; -import org.apache.commons.jexl2.parser.ASTLTNode; -import org.apache.commons.jexl2.parser.ASTNENode; -import org.apache.commons.jexl2.parser.ASTNRNode; -import org.apache.commons.jexl2.parser.ASTOrNode; -import org.apache.commons.jexl2.parser.JexlNode; -import org.apache.commons.jexl2.parser.ParserTreeConstants; - -public class JexlOperatorConstants implements ParserTreeConstants { - - private static Map,String> operatorMap = new ConcurrentHashMap,String>(); - private static Map> classMap = new ConcurrentHashMap>(); - private static Map jjtOperatorMap = new ConcurrentHashMap(); - private static Map jjtTypeMap = new ConcurrentHashMap(); - - static { - operatorMap.put(ASTEQNode.class, "=="); - operatorMap.put(ASTNENode.class, "!="); - operatorMap.put(ASTLTNode.class, "<"); - operatorMap.put(ASTLENode.class, "<="); - operatorMap.put(ASTGTNode.class, ">"); - operatorMap.put(ASTGENode.class, ">="); - operatorMap.put(ASTERNode.class, "=~"); - operatorMap.put(ASTNRNode.class, "!~"); - operatorMap.put(ASTFunctionNode.class, "f"); - operatorMap.put(ASTAndNode.class, "and"); - operatorMap.put(ASTOrNode.class, "or"); - - classMap.put("==", ASTEQNode.class); - classMap.put("!=", ASTNENode.class); - classMap.put("<", ASTLTNode.class); - classMap.put("<=", ASTLENode.class); - classMap.put(">", ASTGTNode.class); - classMap.put(">=", ASTGENode.class); - classMap.put("=~", ASTERNode.class); - classMap.put("!~", ASTNRNode.class); - classMap.put("f", ASTFunctionNode.class); - - jjtOperatorMap.put(JJTEQNODE, "=="); - jjtOperatorMap.put(JJTNENODE, "!="); - jjtOperatorMap.put(JJTLTNODE, "<"); - jjtOperatorMap.put(JJTLENODE, "<="); - jjtOperatorMap.put(JJTGTNODE, ">"); - jjtOperatorMap.put(JJTGENODE, ">="); - jjtOperatorMap.put(JJTERNODE, "=~"); - jjtOperatorMap.put(JJTNRNODE, "!~"); - jjtOperatorMap.put(JJTFUNCTIONNODE, "f"); - jjtOperatorMap.put(JJTANDNODE, "and"); - jjtOperatorMap.put(JJTORNODE, "or"); - - jjtTypeMap.put("==", JJTEQNODE); - jjtTypeMap.put("!=", JJTNENODE); - jjtTypeMap.put("<", JJTLTNODE); - jjtTypeMap.put("<=", JJTLENODE); - jjtTypeMap.put(">", JJTGTNODE); - jjtTypeMap.put(">=", JJTGENODE); - jjtTypeMap.put("=~", JJTERNODE); - jjtTypeMap.put("!~", JJTNRNODE); - jjtTypeMap.put("f", JJTFUNCTIONNODE); - - } - - public static String getOperator(Class nodeType) { - return operatorMap.get(nodeType); - } - - public static String getOperator(Integer jjtNode) { - return jjtOperatorMap.get(jjtNode); - } - - public static Class getClass(String operator) { - return classMap.get(operator); - } - - public static int getJJTNodeType(String operator) { - return jjtTypeMap.get(operator); - } -} http://git-wip-us.apache.org/repos/asf/accumulo/blob/8db62992/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java ---------------------------------------------------------------------- diff --git a/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java b/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java deleted file mode 100644 index aaac3d8..0000000 --- a/src/examples/wikisearch/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java +++ /dev/null @@ -1,291 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.accumulo.examples.wikisearch.parser; - -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Set; - - -import org.apache.accumulo.examples.wikisearch.function.QueryFunctions; -import org.apache.accumulo.examples.wikisearch.jexl.Arithmetic; -import org.apache.accumulo.examples.wikisearch.parser.EventFields.FieldValue; -import org.apache.accumulo.examples.wikisearch.parser.QueryParser.QueryTerm; -import org.apache.commons.jexl2.Expression; -import org.apache.commons.jexl2.JexlContext; -import org.apache.commons.jexl2.JexlEngine; -import org.apache.commons.jexl2.MapContext; -import org.apache.commons.jexl2.Script; -import org.apache.commons.jexl2.parser.ParseException; -import org.apache.commons.jexl2.parser.ParserTreeConstants; -import org.apache.log4j.Level; -import org.apache.log4j.Logger; - - -import com.google.common.collect.Multimap; - - -/** - * This class evaluates events against a query. The query is passed to the constructor and then parsed. It is evaluated against an event in the evaluate method. - */ -public class QueryEvaluator { - - private static Logger log = Logger.getLogger(QueryEvaluator.class); - // According to the JEXL 2.0 docs, the engine is thread-safe. Let's create 1 engine per VM and - // cache 128 expressions - private static JexlEngine engine = new JexlEngine(null, new Arithmetic(false), null, null); - - static { - engine.setSilent(false); - engine.setCache(128); - Map functions = new HashMap(); - functions.put("f", QueryFunctions.class); - engine.setFunctions(functions); - } - private String query = null; - private Set literals = null; - private Multimap terms = null; - private String modifiedQuery = null; - private JexlContext ctx = new MapContext(); - private boolean caseInsensitive = true; - - public QueryEvaluator(String query) throws ParseException { - this.caseInsensitive = true; // default case insensitive matching. - if (caseInsensitive) { - query = query.toLowerCase(); - } - this.query = query; - QueryParser parser = new QueryParser(); - parser.execute(query); - this.terms = parser.getQueryTerms(); - if (caseInsensitive) { - literals = new HashSet(); - for (String lit : parser.getQueryIdentifiers()) { - literals.add(lit.toLowerCase()); - } - } else { - this.literals = parser.getQueryIdentifiers(); - } - } - - public QueryEvaluator(String query, boolean insensitive) throws ParseException { - this.caseInsensitive = insensitive; - if (this.caseInsensitive) { - query = query.toLowerCase(); - } - this.query = query; - QueryParser parser = new QueryParser(); - parser.execute(query); - this.terms = parser.getQueryTerms(); - - if (caseInsensitive) { - literals = new HashSet(); - for (String lit : parser.getQueryIdentifiers()) { - literals.add(lit.toLowerCase()); - } - } else { - this.literals = parser.getQueryIdentifiers(); - } - } - - public String getQuery() { - return this.query; - } - - public void printLiterals() { - for (String s : literals) { - System.out.println("literal: " + s); - } - } - - public void setLevel(Level lev) { - log.setLevel(lev); - } - - public StringBuilder rewriteQuery(StringBuilder query, String fieldName, Collection fieldValues) { - if (log.isDebugEnabled()) { - log.debug("rewriteQuery"); - } - // Here we have a field that has multiple values. In this case we need to put - // all values into the jexl context as an array and rewrite the query to account for all - // of the fields. - if (caseInsensitive) { - fieldName = fieldName.toLowerCase(); - } - if (log.isDebugEnabled()) { - log.debug("Modifying original query: " + query); - } - // Pull the values out of the FieldValue object - String[] values = new String[fieldValues.size()]; - int idx = 0; - for (FieldValue fv : fieldValues) { - if (caseInsensitive) { - values[idx] = (new String(fv.getValue())).toLowerCase(); - } else { - values[idx] = new String(fv.getValue()); - } - idx++; - } - // Add the array to the context - ctx.set(fieldName, values); - - Collection qt = terms.get(fieldName); - - // Add a script to the beginning of the query for this multi-valued field - StringBuilder script = new StringBuilder(); - script.append("_").append(fieldName).append(" = false;\n"); - script.append("for (field : ").append(fieldName).append(") {\n"); - - for (QueryTerm t : qt) { - if (!t.getOperator().equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) { - script.append("\tif (_").append(fieldName).append(" == false && field ").append(t.getOperator()).append(" ").append(t.getValue()).append(") { \n"); - } else { - script.append("\tif (_").append(fieldName).append(" == false && ").append(t.getValue().toString().replace(fieldName, "field")).append(") { \n"); - } - script.append("\t\t_").append(fieldName).append(" = true;\n"); - script.append("\t}\n"); - } - script.append("}\n"); - - // Add the script to the beginning of the query - query.insert(0, script.toString()); - - StringBuilder newPredicate = new StringBuilder(); - newPredicate.append("_").append(fieldName).append(" == true"); - - for (QueryTerm t : qt) { - // Find the location of this term in the query - StringBuilder predicate = new StringBuilder(); - int start = 0; - if (!t.getOperator().equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) { - predicate.append(fieldName).append(" ").append(t.getOperator()).append(" ").append(t.getValue()); - start = query.indexOf(predicate.toString()); - } else { - predicate.append(t.getValue().toString()); - // need to find the second occurence of the string. - start = query.indexOf(predicate.toString()); - } - if (-1 == start) { - log.warn("Unable to find predicate: " + predicate.toString() + " in rewritten query: " + query.toString()); - } - int length = predicate.length(); - - // Now modify the query to check the value of my.fieldName - query.replace(start, start + length, newPredicate.toString()); - } - - if (log.isDebugEnabled()) { - log.debug("leaving rewriteQuery with: " + query.toString()); - } - return query; - } - - /** - * Evaluates the query against an event. - * - * @param eventFields - */ - public boolean evaluate(EventFields eventFields) { - - this.modifiedQuery = null; - boolean rewritten = false; - - // Copy the query - StringBuilder q = new StringBuilder(query); - // Copy the literals, we are going to remove elements from this set - // when they are added to the JEXL context. This will allow us to - // determine which items in the query where *NOT* in the data. - HashSet literalsCopy = new HashSet(literals); - - // Loop through the event fields and add them to the JexlContext. - for (Entry> field : eventFields.asMap().entrySet()) { - String fName = field.getKey(); - if (caseInsensitive) { - fName = fName.toLowerCase(); - } - // If this field is not part of the expression, then skip it. - if (!literals.contains(fName)) { - continue; - } else { - literalsCopy.remove(fName); - } - - // This field may have multiple values. - if (field.getValue().size() == 0) { - continue; - } else if (field.getValue().size() == 1) { - // We are explicitly converting these bytes to a String. - if (caseInsensitive) { - ctx.set(field.getKey().toLowerCase(), (new String(field.getValue().iterator().next().getValue())).toLowerCase()); - } else { - ctx.set(field.getKey(), new String(field.getValue().iterator().next().getValue())); - } - - } else { - // q = queryRewrite(q, field.getKey(), field.getValue()); - q = rewriteQuery(q, field.getKey(), field.getValue()); - rewritten = true; - }// End of if - - }// End of loop - - // For any literals in the query that were not found in the data, add them to the context - // with a null value. - for (String lit : literalsCopy) { - ctx.set(lit, null); - } - - if (log.isDebugEnabled()) { - log.debug("Evaluating query: " + q.toString()); - } - - this.modifiedQuery = q.toString(); - - Boolean result = null; - if (rewritten) { - Script script = engine.createScript(this.modifiedQuery); - try { - result = (Boolean) script.execute(ctx); - } catch (Exception e) { - log.error("Error evaluating script: " + this.modifiedQuery + " against event" + eventFields.toString(), e); - } - } else { - Expression expr = engine.createExpression(this.modifiedQuery); - try { - result = (Boolean) expr.evaluate(ctx); - } catch (Exception e) { - log.error("Error evaluating expression: " + this.modifiedQuery + " against event" + eventFields.toString(), e); - } - } - if (null != result && result) { - return true; - } else { - return false; - } - } // End of method - - /** - * - * @return rewritten query that was evaluated against the most recent event - */ - public String getModifiedQuery() { - return this.modifiedQuery; - } -}