From commits-return-22949-archive-asf-public=cust-asf.ponee.io@accumulo.apache.org Wed Jun 5 20:59:50 2019 Return-Path: X-Original-To: archive-asf-public@cust-asf.ponee.io Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [207.244.88.153]) by mx-eu-01.ponee.io (Postfix) with SMTP id 0B9C218065D for ; Wed, 5 Jun 2019 22:59:47 +0200 (CEST) Received: (qmail 89896 invoked by uid 500); 5 Jun 2019 20:59:47 -0000 Mailing-List: contact commits-help@accumulo.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@accumulo.apache.org Delivered-To: mailing list commits@accumulo.apache.org Received: (qmail 89887 invoked by uid 99); 5 Jun 2019 20:59:47 -0000 Received: from ec2-52-202-80-70.compute-1.amazonaws.com (HELO gitbox.apache.org) (52.202.80.70) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 05 Jun 2019 20:59:47 +0000 Received: by gitbox.apache.org (ASF Mail Server at gitbox.apache.org, from userid 33) id C92998AB56; Wed, 5 Jun 2019 20:59:41 +0000 (UTC) Date: Wed, 05 Jun 2019 20:59:41 +0000 To: "commits@accumulo.apache.org" Subject: [accumulo-wikisearch] branch master updated: Build against Accumulo 2.0.0-alpha-2 MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Transfer-Encoding: 8bit Message-ID: <155976838171.18298.12138972242577185197@gitbox.apache.org> From: ctubbsii@apache.org X-Git-Host: gitbox.apache.org X-Git-Repo: accumulo-wikisearch X-Git-Refname: refs/heads/master X-Git-Reftype: branch X-Git-Oldrev: 3246e203a874460d20d4dff4e001dd59898a2f34 X-Git-Newrev: 7667f22431e71d23c6e8b0b79787c39a29f252f0 X-Git-Rev: 7667f22431e71d23c6e8b0b79787c39a29f252f0 X-Git-NotificationType: ref_changed_plus_diff X-Git-Multimail-Version: 1.5.dev Auto-Submitted: auto-generated This is an automated email from the ASF dual-hosted git repository. ctubbsii pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/accumulo-wikisearch.git The following commit(s) were added to refs/heads/master by this push: new 7667f22 Build against Accumulo 2.0.0-alpha-2 7667f22 is described below commit 7667f22431e71d23c6e8b0b79787c39a29f252f0 Author: Christopher Tubbs AuthorDate: Wed Jun 5 16:59:06 2019 -0400 Build against Accumulo 2.0.0-alpha-2 --- NOTICE | 2 +- ingest/pom.xml | 3 +- .../wikisearch/ingest/WikipediaConfiguration.java | 89 +-- .../wikisearch/ingest/WikipediaIngester.java | 101 +-- .../wikisearch/ingest/WikipediaMapper.java | 110 ++-- .../ingest/WikipediaPartitionedIngester.java | 172 +++--- .../wikisearch/reader/AggregatingRecordReader.java | 66 +- .../examples/wikisearch/reader/LfLineReader.java | 67 +- .../examples/wikisearch/util/TextUtil.java | 32 +- .../wikisearch/ingest/WikipediaInputSplitTest.java | 13 +- .../wikisearch/iterator/TextIndexTest.java | 81 ++- pom.xml | 201 +++--- query-war/pom.xml | 12 +- query/pom.xml | 29 +- .../iterator/AbstractEvaluatingIterator.java | 180 +++--- .../examples/wikisearch/iterator/AndIterator.java | 422 +++++++------ .../wikisearch/iterator/BooleanLogicIterator.java | 674 ++++++++++++--------- .../iterator/DefaultIteratorEnvironment.java | 21 +- .../wikisearch/iterator/EvaluatingIterator.java | 40 +- .../examples/wikisearch/iterator/OrIterator.java | 378 ++++++------ .../wikisearch/logic/AbstractQueryLogic.java | 411 +++++++------ .../wikisearch/parser/FieldIndexQueryReWriter.java | 464 +++++++------- .../examples/wikisearch/parser/QueryEvaluator.java | 110 ++-- .../wikisearch/parser/RangeCalculator.java | 640 +++++++++++-------- .../examples/wikisearch/parser/TreeBuilder.java | 278 +++++---- .../accumulo/examples/wikisearch/query/Query.java | 114 ++-- .../examples/wikisearch/util/BaseKeyParser.java | 33 +- 27 files changed, 2566 insertions(+), 2177 deletions(-) diff --git a/NOTICE b/NOTICE index 8a2d875..b58fdab 100644 --- a/NOTICE +++ b/NOTICE @@ -1,5 +1,5 @@ Apache Accumulo Wikisearch -Copyright 2011-2017 The Apache Software Foundation +Copyright 2011-2019 The Apache Software Foundation This product includes software developed at The Apache Software Foundation (http://www.apache.org/). diff --git a/ingest/pom.xml b/ingest/pom.xml index 426cff2..a317f5b 100644 --- a/ingest/pom.xml +++ b/ingest/pom.xml @@ -20,7 +20,7 @@ org.apache.accumulo accumulo-wikisearch - 1.8.0 + 2.0.0-SNAPSHOT wikisearch-ingest wikisearch-ingest @@ -102,7 +102,6 @@ src/assembly/dist.xml - gnu diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java index 27a28a1..44a3fbc 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaConfiguration.java @@ -35,21 +35,21 @@ public class WikipediaConfiguration { public final static String USER = "wikipedia.accumulo.user"; public final static String PASSWORD = "wikipedia.accumulo.password"; public final static String TABLE_NAME = "wikipedia.accumulo.table"; - + public final static String ZOOKEEPERS = "wikipedia.accumulo.zookeepers"; - + public final static String NAMESPACES_FILENAME = "wikipedia.namespaces.filename"; public final static String LANGUAGES_FILENAME = "wikipedia.languages.filename"; public final static String WORKING_DIRECTORY = "wikipedia.ingest.working"; - + public final static String ANALYZER = "wikipedia.index.analyzer"; - + public final static String NUM_PARTITIONS = "wikipedia.ingest.partitions"; public final static String NUM_GROUPS = "wikipedia.ingest.groups"; public final static String PARTITIONED_ARTICLES_DIRECTORY = "wikipedia.partitioned.directory"; - + public final static String RUN_PARTITIONER = "wikipedia.run.partitioner"; public final static String RUN_INGEST = "wikipedia.run.ingest"; public final static String BULK_INGEST = "wikipedia.bulk.ingest"; @@ -57,12 +57,11 @@ public class WikipediaConfiguration { public final static String BULK_INGEST_FAILURE_DIR = "wikipedia.bulk.ingest.failure.dir"; public final static String BULK_INGEST_BUFFER_SIZE = "wikipedia.bulk.ingest.buffer.size"; public final static String PARTITIONED_INPUT_MIN_SPLIT_SIZE = "wikipedia.min.input.split.size"; - - + public static String getUser(Configuration conf) { return conf.get(USER); - }; - + } + public static byte[] getPassword(Configuration conf) { String pass = conf.get(PASSWORD); if (pass == null) { @@ -70,7 +69,7 @@ public class WikipediaConfiguration { } return pass.getBytes(); } - + public static String getTableName(Configuration conf) { String tablename = conf.get(TABLE_NAME); if (tablename == null) { @@ -78,11 +77,11 @@ public class WikipediaConfiguration { } return tablename; } - + public static String getInstanceName(Configuration conf) { return conf.get(INSTANCE_NAME); } - + public static String getZookeepers(Configuration conf) { String zookeepers = conf.get(ZOOKEEPERS); if (zookeepers == null) { @@ -90,47 +89,51 @@ public class WikipediaConfiguration { } return zookeepers; } - + public static Path getNamespacesFile(Configuration conf) { - String filename = conf.get(NAMESPACES_FILENAME, new Path(getWorkingDirectory(conf), "namespaces.dat").toString()); + String filename = conf.get(NAMESPACES_FILENAME, + new Path(getWorkingDirectory(conf), "namespaces.dat").toString()); return new Path(filename); } - + public static Path getLanguagesFile(Configuration conf) { - String filename = conf.get(LANGUAGES_FILENAME, new Path(getWorkingDirectory(conf), "languages.txt").toString()); + String filename = conf.get(LANGUAGES_FILENAME, + new Path(getWorkingDirectory(conf), "languages.txt").toString()); return new Path(filename); } - + public static Path getWorkingDirectory(Configuration conf) { String filename = conf.get(WORKING_DIRECTORY); return new Path(filename); } - + public static Analyzer getAnalyzer(Configuration conf) throws IOException { - Class analyzerClass = conf.getClass(ANALYZER, SimpleAnalyzer.class, Analyzer.class); + Class analyzerClass = + conf.getClass(ANALYZER, SimpleAnalyzer.class, Analyzer.class); return ReflectionUtils.newInstance(analyzerClass, conf); } - - public static Connector getConnector(Configuration conf) throws AccumuloException, AccumuloSecurityException { + + public static Connector getConnector(Configuration conf) + throws AccumuloException, AccumuloSecurityException { return getInstance(conf).getConnector(getUser(conf), getPassword(conf)); } - + public static Instance getInstance(Configuration conf) { return new ZooKeeperInstance(getInstanceName(conf), getZookeepers(conf)); } - + public static int getNumPartitions(Configuration conf) { return conf.getInt(NUM_PARTITIONS, 25); } - + public static int getNumGroups(Configuration conf) { return conf.getInt(NUM_GROUPS, 1); } - + public static Path getPartitionedArticlesPath(Configuration conf) { return new Path(conf.get(PARTITIONED_ARTICLES_DIRECTORY)); } - + public static long getMinInputSplitSize(Configuration conf) { return conf.getLong(PARTITIONED_INPUT_MIN_SPLIT_SIZE, 1l << 27); } @@ -154,18 +157,14 @@ public class WikipediaConfiguration { public static String bulkIngestFailureDir(Configuration conf) { return conf.get(BULK_INGEST_FAILURE_DIR); } - + public static long bulkIngestBufferSize(Configuration conf) { - return conf.getLong(BULK_INGEST_BUFFER_SIZE,1l<<28); + return conf.getLong(BULK_INGEST_BUFFER_SIZE, 1l << 28); } /** * Helper method to get properties from Hadoop configuration - * - * @param - * @param conf - * @param propertyName - * @param resultClass + * * @throws IllegalArgumentException * if property is not defined, null, or empty. Or if resultClass is not handled. * @return value of property @@ -173,26 +172,28 @@ public class WikipediaConfiguration { @SuppressWarnings("unchecked") public static T isNull(Configuration conf, String propertyName, Class resultClass) { String p = conf.get(propertyName); - if (StringUtils.isEmpty(p)) + if (StringUtils.isEmpty(p)) { throw new IllegalArgumentException(propertyName + " must be specified"); - - if (resultClass.equals(String.class)) + } + + if (resultClass.equals(String.class)) { return (T) p; - else if (resultClass.equals(String[].class)) + } else if (resultClass.equals(String[].class)) { return (T) conf.getStrings(propertyName); - else if (resultClass.equals(Boolean.class)) + } else if (resultClass.equals(Boolean.class)) { return (T) Boolean.valueOf(p); - else if (resultClass.equals(Long.class)) + } else if (resultClass.equals(Long.class)) { return (T) Long.valueOf(p); - else if (resultClass.equals(Integer.class)) + } else if (resultClass.equals(Integer.class)) { return (T) Integer.valueOf(p); - else if (resultClass.equals(Float.class)) + } else if (resultClass.equals(Float.class)) { return (T) Float.valueOf(p); - else if (resultClass.equals(Double.class)) + } else if (resultClass.equals(Double.class)) { return (T) Double.valueOf(p); - else + } else { throw new IllegalArgumentException(resultClass.getSimpleName() + " is unhandled."); - + } + } } diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java index 1a495ed..4be65d5 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaIngester.java @@ -30,12 +30,12 @@ import java.util.regex.Pattern; import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.ClientConfiguration; +import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.IteratorSetting.Column; import org.apache.accumulo.core.client.TableExistsException; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty; import org.apache.accumulo.core.client.admin.TableOperations; import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat; import org.apache.accumulo.core.client.security.tokens.PasswordToken; @@ -59,68 +59,73 @@ import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; public class WikipediaIngester extends Configured implements Tool { - + public final static String INGEST_LANGUAGE = "wikipedia.ingest_language"; public final static String SPLIT_FILE = "wikipedia.split_file"; public final static String TABLE_NAME = "wikipedia.table"; - + public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new WikipediaIngester(), args); System.exit(res); } - - public static void createTables(TableOperations tops, String tableName, boolean configureLocalityGroups) throws AccumuloException, AccumuloSecurityException, + + public static void createTables(TableOperations tops, String tableName, + boolean configureLocalityGroups) throws AccumuloException, AccumuloSecurityException, TableNotFoundException, TableExistsException { // Create the shard table String indexTableName = tableName + "Index"; String reverseIndexTableName = tableName + "ReverseIndex"; String metadataTableName = tableName + "Metadata"; - + // create the shard table if (!tops.exists(tableName)) { - // Set a text index combiner on the given field names. No combiner is set if the option is not supplied + // Set a text index combiner on the given field names. No combiner is set if the option is not + // supplied String textIndexFamilies = WikipediaMapper.TOKENS_FIELD_NAME; - + tops.create(tableName); if (textIndexFamilies.length() > 0) { System.out.println("Adding content combiner on the fields: " + textIndexFamilies); - + IteratorSetting setting = new IteratorSetting(10, TextIndexCombiner.class); - List columns = new ArrayList(); + List columns = new ArrayList<>(); for (String family : StringUtils.split(textIndexFamilies, ',')) { columns.add(new Column("fi\0" + family)); } TextIndexCombiner.setColumns(setting, columns); TextIndexCombiner.setLossyness(setting, true); - + tops.attachIterator(tableName, setting, EnumSet.allOf(IteratorScope.class)); } - + // Set the locality group for the full content column family - if (configureLocalityGroups) - tops.setLocalityGroups(tableName, - Collections.singletonMap("WikipediaDocuments", Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY)))); - + if (configureLocalityGroups) { + tops.setLocalityGroups(tableName, Collections.singletonMap("WikipediaDocuments", + Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY)))); + } + } - + if (!tops.exists(indexTableName)) { tops.create(indexTableName); // Add the UID combiner - IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); + IteratorSetting setting = + new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); GlobalIndexUidCombiner.setCombineAllColumns(setting, true); GlobalIndexUidCombiner.setLossyness(setting, true); tops.attachIterator(indexTableName, setting, EnumSet.allOf(IteratorScope.class)); } - + if (!tops.exists(reverseIndexTableName)) { tops.create(reverseIndexTableName); // Add the UID combiner - IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); + IteratorSetting setting = + new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); GlobalIndexUidCombiner.setCombineAllColumns(setting, true); GlobalIndexUidCombiner.setLossyness(setting, true); tops.attachIterator(reverseIndexTableName, setting, EnumSet.allOf(IteratorScope.class)); } - + if (!tops.exists(metadataTableName)) { // Add the SummingCombiner with VARLEN encoding for the frequency column tops.create(metadataTableName); @@ -130,42 +135,44 @@ public class WikipediaIngester extends Configured implements Tool { tops.attachIterator(metadataTableName, setting, EnumSet.allOf(IteratorScope.class)); } } - + @Override public int run(String[] args) throws Exception { Job job = new Job(getConf(), "Ingest Wikipedia"); Configuration conf = job.getConfiguration(); conf.set("mapred.map.tasks.speculative.execution", "false"); - + String tablename = WikipediaConfiguration.getTableName(conf); - ClientConfiguration clientConfig = new ClientConfiguration(); - clientConfig.setProperty(ClientProperty.INSTANCE_NAME, WikipediaConfiguration.getInstanceName(conf)); - clientConfig.setProperty(ClientProperty.INSTANCE_ZK_HOST, WikipediaConfiguration.getZookeepers(conf)); - + ClientConfiguration clientConfig = ClientConfiguration.create(); + clientConfig.setProperty(ClientProperty.INSTANCE_NAME, + WikipediaConfiguration.getInstanceName(conf)); + clientConfig.setProperty(ClientProperty.INSTANCE_ZK_HOST, + WikipediaConfiguration.getZookeepers(conf)); + String user = WikipediaConfiguration.getUser(conf); byte[] password = WikipediaConfiguration.getPassword(conf); Connector connector = WikipediaConfiguration.getConnector(conf); - + TableOperations tops = connector.tableOperations(); - + createTables(tops, tablename, true); - + configureJob(job); - - List inputPaths = new ArrayList(); - SortedSet languages = new TreeSet(); + + List inputPaths = new ArrayList<>(); + SortedSet languages = new TreeSet<>(); FileSystem fs = FileSystem.get(conf); Path parent = new Path(conf.get("wikipedia.input")); listFiles(parent, fs, inputPaths, languages); - + System.out.println("Input files in " + parent + ":" + inputPaths.size()); Path[] inputPathsArray = new Path[inputPaths.size()]; inputPaths.toArray(inputPathsArray); - + System.out.println("Languages:" + languages.size()); - + FileInputFormat.setInputPaths(job, inputPathsArray); - + job.setMapperClass(WikipediaMapper.class); job.setNumReduceTasks(0); job.setMapOutputKeyClass(Text.class); @@ -173,17 +180,12 @@ public class WikipediaIngester extends Configured implements Tool { job.setOutputFormatClass(AccumuloOutputFormat.class); AccumuloOutputFormat.setConnectorInfo(job, user, new PasswordToken(password)); AccumuloOutputFormat.setZooKeeperInstance(job, clientConfig); - + return job.waitForCompletion(true) ? 0 : 1; } - - public final static PathFilter partFilter = new PathFilter() { - @Override - public boolean accept(Path path) { - return path.getName().startsWith("part"); - }; - }; - + + public final static PathFilter partFilter = path -> path.getName().startsWith("part"); + protected void configureJob(Job job) { Configuration conf = job.getConfiguration(); job.setJarByClass(WikipediaIngester.class); @@ -191,10 +193,11 @@ public class WikipediaIngester extends Configured implements Tool { conf.set(AggregatingRecordReader.START_TOKEN, ""); conf.set(AggregatingRecordReader.END_TOKEN, ""); } - + protected static final Pattern filePattern = Pattern.compile("([a-z_]+).*.xml(.bz2)?"); - - protected void listFiles(Path path, FileSystem fs, List files, Set languages) throws IOException { + + protected void listFiles(Path path, FileSystem fs, List files, Set languages) + throws IOException { for (FileStatus status : fs.listStatus(path)) { if (status.isDir()) { listFiles(status.getPath(), fs, files, languages); diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java index 8565b09..c751637 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaMapper.java @@ -15,11 +15,10 @@ * limitations under the License. */ /** - * + * */ package org.apache.accumulo.examples.wikisearch.ingest; - import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; @@ -55,19 +54,19 @@ import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; public class WikipediaMapper extends Mapper { - + private static final Logger log = Logger.getLogger(WikipediaMapper.class); - + public final static Charset UTF8 = Charset.forName("UTF-8"); public static final String DOCUMENT_COLUMN_FAMILY = "d"; public static final String METADATA_EVENT_COLUMN_FAMILY = "e"; public static final String METADATA_INDEX_COLUMN_FAMILY = "i"; public static final String TOKENS_FIELD_NAME = "TEXT"; - + private final static Pattern languagePattern = Pattern.compile("([a-z_]+).*.xml(.bz2)?"); private static final Value NULL_VALUE = new Value(new byte[0]); private static final String cvPrefix = "all|"; - + private ArticleExtractor extractor; private String language; private int numPartitions = 0; @@ -75,12 +74,12 @@ public class WikipediaMapper extends Mapper { private int myGroup = -1; private int numGroups = -1; - + private Text tablename = null; private Text indexTableName = null; private Text reverseIndexTableName = null; private Text metadataTableName = null; - + @Override public void setup(Context context) { Configuration conf = context.getConfiguration(); @@ -88,11 +87,11 @@ public class WikipediaMapper extends Mapper { indexTableName = new Text(tablename + "Index"); reverseIndexTableName = new Text(tablename + "ReverseIndex"); metadataTableName = new Text(tablename + "Metadata"); - - WikipediaInputSplit wiSplit = (WikipediaInputSplit)context.getInputSplit(); + + WikipediaInputSplit wiSplit = (WikipediaInputSplit) context.getInputSplit(); myGroup = wiSplit.getPartition(); numGroups = WikipediaConfiguration.getNumGroups(conf); - + FileSplit split = wiSplit.getFileSplit(); String fileName = split.getPath().getName(); Matcher matcher = languagePattern.matcher(fileName); @@ -104,40 +103,44 @@ public class WikipediaMapper extends Mapper { extractor = new ArticleExtractor(); numPartitions = WikipediaConfiguration.getNumPartitions(conf); cv = new ColumnVisibility(cvPrefix + language); - + } - + /** * We will partition the documents based on the document id - * - * @param article - * @param numPartitions + * * @return The number of the partition for a given article. - * @throws IllegalFormatException */ - public static int getPartitionId(Article article, int numPartitions) throws IllegalFormatException { + public static int getPartitionId(Article article, int numPartitions) + throws IllegalFormatException { return article.getId() % numPartitions; } - - static HashSet metadataSent = new HashSet(); + + static HashSet metadataSent = new HashSet<>(); @Override - protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - Article article = extractor.extract(new InputStreamReader(new ByteArrayInputStream(value.getBytes()), UTF8)); + protected void map(LongWritable key, Text value, Context context) + throws IOException, InterruptedException { + Article article = + extractor.extract(new InputStreamReader(new ByteArrayInputStream(value.getBytes()), UTF8)); String NULL_BYTE = "\u0000"; String colfPrefix = language + NULL_BYTE; String indexPrefix = "fi" + NULL_BYTE; if (article != null) { int groupId = WikipediaMapper.getPartitionId(article, numGroups); - if(groupId != myGroup) + if (groupId != myGroup) { return; - Text partitionId = new Text(Integer.toString(WikipediaMapper.getPartitionId(article, numPartitions))); - + } + Text partitionId = + new Text(Integer.toString(WikipediaMapper.getPartitionId(article, numPartitions))); + // Create the mutations for the document. // Row is partition id, colf is language0articleid, colq is fieldName\0fieldValue Mutation m = new Mutation(partitionId); for (Entry entry : article.getFieldValues().entrySet()) { - m.put(colfPrefix + article.getId(), entry.getKey() + NULL_BYTE + entry.getValue().toString(), cv, article.getTimestamp(), NULL_VALUE); + m.put(colfPrefix + article.getId(), + entry.getKey() + NULL_BYTE + entry.getValue().toString(), cv, article.getTimestamp(), + NULL_VALUE); // Create mutations for the metadata table. String metadataKey = entry.getKey() + METADATA_EVENT_COLUMN_FAMILY + language; if (!metadataSent.contains(metadataKey)) { @@ -147,26 +150,30 @@ public class WikipediaMapper extends Mapper { metadataSent.add(metadataKey); } } - + // Tokenize the content Set tokens = getTokens(article); - + // We are going to put the fields to be indexed into a multimap. This allows us to iterate // over the entire set once. Multimap indexFields = HashMultimap.create(); // Add the normalized field values LcNoDiacriticsNormalizer normalizer = new LcNoDiacriticsNormalizer(); - for (Entry index : article.getNormalizedFieldValues().entrySet()) + for (Entry index : article.getNormalizedFieldValues().entrySet()) { indexFields.put(index.getKey(), index.getValue()); + } // Add the tokens - for (String token : tokens) + for (String token : tokens) { indexFields.put(TOKENS_FIELD_NAME, normalizer.normalizeFieldValue("", token)); - + } + for (Entry index : indexFields.entries()) { // Create mutations for the in partition index // Row is partition id, colf is 'fi'\0fieldName, colq is fieldValue\0language\0article id - m.put(indexPrefix + index.getKey(), index.getValue() + NULL_BYTE + colfPrefix + article.getId(), cv, article.getTimestamp(), NULL_VALUE); - + m.put(indexPrefix + index.getKey(), + index.getValue() + NULL_BYTE + colfPrefix + article.getId(), cv, article.getTimestamp(), + NULL_VALUE); + // Create mutations for the global index // Create a UID object for the Value Builder uidBuilder = Uid.List.newBuilder(); @@ -175,54 +182,57 @@ public class WikipediaMapper extends Mapper { uidBuilder.addUID(Integer.toString(article.getId())); Uid.List uidList = uidBuilder.build(); Value val = new Value(uidList.toByteArray()); - + // Create mutations for the global index - // Row is field value, colf is field name, colq is partitionid\0language, value is Uid.List object + // Row is field value, colf is field name, colq is partitionid\0language, value is Uid.List + // object Mutation gm = new Mutation(index.getValue()); gm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val); context.write(indexTableName, gm); - + // Create mutations for the global reverse index Mutation grm = new Mutation(StringUtils.reverse(index.getValue())); - grm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), val); + grm.put(index.getKey(), partitionId + NULL_BYTE + language, cv, article.getTimestamp(), + val); context.write(reverseIndexTableName, grm); - + // Create mutations for the metadata table. String metadataKey = index.getKey() + METADATA_INDEX_COLUMN_FAMILY + language; if (!metadataSent.contains(metadataKey)) { Mutation mm = new Mutation(index.getKey()); - mm.put(METADATA_INDEX_COLUMN_FAMILY, language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv, article.getTimestamp(), NULL_VALUE); + mm.put(METADATA_INDEX_COLUMN_FAMILY, + language + NULL_BYTE + LcNoDiacriticsNormalizer.class.getName(), cv, + article.getTimestamp(), NULL_VALUE); context.write(metadataTableName, mm); metadataSent.add(metadataKey); } } // Add the entire text to the document section of the table. - // row is the partition, colf is 'd', colq is language\0articleid, value is Base64 encoded GZIP'd document - m.put(DOCUMENT_COLUMN_FAMILY, colfPrefix + article.getId(), cv, article.getTimestamp(), new Value(Base64.encodeBase64(article.getText().getBytes()))); + // row is the partition, colf is 'd', colq is language\0articleid, value is Base64 encoded + // GZIP'd document + m.put(DOCUMENT_COLUMN_FAMILY, colfPrefix + article.getId(), cv, article.getTimestamp(), + new Value(Base64.encodeBase64(article.getText().getBytes()))); context.write(tablename, m); - + } else { context.getCounter("wikipedia", "invalid articles").increment(1); } context.progress(); } - + /** * Tokenize the wikipedia content - * - * @param article - * @return - * @throws IOException */ static Set getTokens(Article article) throws IOException { - Set tokenList = new HashSet(); + Set tokenList = new HashSet<>(); WikipediaTokenizer tok = new WikipediaTokenizer(new StringReader(article.getText())); TermAttribute term = tok.addAttribute(TermAttribute.class); try { while (tok.incrementToken()) { String token = term.term(); - if (!StringUtils.isEmpty(token)) + if (!StringUtils.isEmpty(token)) { tokenList.add(token); + } } } catch (IOException e) { log.error("Error tokenizing text", e); @@ -241,5 +251,5 @@ public class WikipediaMapper extends Mapper { } return tokenList; } - + } diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java index 841f169..e9248a8 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaPartitionedIngester.java @@ -30,12 +30,12 @@ import java.util.regex.Pattern; import org.apache.accumulo.core.client.AccumuloException; import org.apache.accumulo.core.client.AccumuloSecurityException; import org.apache.accumulo.core.client.ClientConfiguration; +import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty; import org.apache.accumulo.core.client.Connector; import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.IteratorSetting.Column; import org.apache.accumulo.core.client.TableExistsException; import org.apache.accumulo.core.client.TableNotFoundException; -import org.apache.accumulo.core.client.ClientConfiguration.ClientProperty; import org.apache.accumulo.core.client.admin.TableOperations; import org.apache.accumulo.core.client.mapreduce.AccumuloOutputFormat; import org.apache.accumulo.core.client.security.tokens.PasswordToken; @@ -70,62 +70,66 @@ public class WikipediaPartitionedIngester extends Configured implements Tool { public final static String INGEST_LANGUAGE = "wikipedia.ingest_language"; public final static String SPLIT_FILE = "wikipedia.split_file"; public final static String TABLE_NAME = "wikipedia.table"; - + public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new WikipediaPartitionedIngester(), args); System.exit(res); } - - private void createTables(TableOperations tops, String tableName) throws AccumuloException, AccumuloSecurityException, TableNotFoundException, - TableExistsException { + + private void createTables(TableOperations tops, String tableName) throws AccumuloException, + AccumuloSecurityException, TableNotFoundException, TableExistsException { // Create the shard table String indexTableName = tableName + "Index"; String reverseIndexTableName = tableName + "ReverseIndex"; String metadataTableName = tableName + "Metadata"; - + // create the shard table if (!tops.exists(tableName)) { - // Set a text index combiner on the given field names. No combiner is set if the option is not supplied + // Set a text index combiner on the given field names. No combiner is set if the option is not + // supplied String textIndexFamilies = WikipediaMapper.TOKENS_FIELD_NAME; - + tops.create(tableName); if (textIndexFamilies.length() > 0) { System.out.println("Adding content combiner on the fields: " + textIndexFamilies); - + IteratorSetting setting = new IteratorSetting(10, TextIndexCombiner.class); - List columns = new ArrayList(); + List columns = new ArrayList<>(); for (String family : StringUtils.split(textIndexFamilies, ',')) { columns.add(new Column("fi\0" + family)); } TextIndexCombiner.setColumns(setting, columns); TextIndexCombiner.setLossyness(setting, true); - + tops.attachIterator(tableName, setting, EnumSet.allOf(IteratorScope.class)); } - + // Set the locality group for the full content column family - tops.setLocalityGroups(tableName, Collections.singletonMap("WikipediaDocuments", Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY)))); - + tops.setLocalityGroups(tableName, Collections.singletonMap("WikipediaDocuments", + Collections.singleton(new Text(WikipediaMapper.DOCUMENT_COLUMN_FAMILY)))); + } - + if (!tops.exists(indexTableName)) { tops.create(indexTableName); // Add the UID combiner - IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); + IteratorSetting setting = + new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); GlobalIndexUidCombiner.setCombineAllColumns(setting, true); GlobalIndexUidCombiner.setLossyness(setting, true); tops.attachIterator(indexTableName, setting, EnumSet.allOf(IteratorScope.class)); } - + if (!tops.exists(reverseIndexTableName)) { tops.create(reverseIndexTableName); // Add the UID combiner - IteratorSetting setting = new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); + IteratorSetting setting = + new IteratorSetting(19, "UIDAggregator", GlobalIndexUidCombiner.class); GlobalIndexUidCombiner.setCombineAllColumns(setting, true); GlobalIndexUidCombiner.setLossyness(setting, true); tops.attachIterator(reverseIndexTableName, setting, EnumSet.allOf(IteratorScope.class)); } - + if (!tops.exists(metadataTableName)) { // Add the SummingCombiner with VARLEN encoding for the frequency column tops.create(metadataTableName); @@ -135,51 +139,51 @@ public class WikipediaPartitionedIngester extends Configured implements Tool { tops.attachIterator(metadataTableName, setting, EnumSet.allOf(IteratorScope.class)); } } - + @Override public int run(String[] args) throws Exception { Configuration conf = getConf(); - if(WikipediaConfiguration.runPartitioner(conf)) - { + if (WikipediaConfiguration.runPartitioner(conf)) { int result = runPartitionerJob(); - if(result != 0) + if (result != 0) { return result; + } } - if(WikipediaConfiguration.runIngest(conf)) - { + if (WikipediaConfiguration.runIngest(conf)) { int result = runIngestJob(); - if(result != 0) + if (result != 0) { return result; - if(WikipediaConfiguration.bulkIngest(conf)) + } + if (WikipediaConfiguration.bulkIngest(conf)) { return loadBulkFiles(); + } } return 0; } - - private int runPartitionerJob() throws Exception - { + + private int runPartitionerJob() throws Exception { Job partitionerJob = new Job(getConf(), "Partition Wikipedia"); Configuration partitionerConf = partitionerJob.getConfiguration(); partitionerConf.set("mapred.map.tasks.speculative.execution", "false"); configurePartitionerJob(partitionerJob); - - List inputPaths = new ArrayList(); - SortedSet languages = new TreeSet(); + + List inputPaths = new ArrayList<>(); + SortedSet languages = new TreeSet<>(); FileSystem fs = FileSystem.get(partitionerConf); Path parent = new Path(partitionerConf.get("wikipedia.input")); listFiles(parent, fs, inputPaths, languages); - + System.out.println("Input files in " + parent + ":" + inputPaths.size()); Path[] inputPathsArray = new Path[inputPaths.size()]; inputPaths.toArray(inputPathsArray); - + System.out.println("Languages:" + languages.size()); // setup input format - + WikipediaInputFormat.setInputPaths(partitionerJob, inputPathsArray); - + partitionerJob.setMapperClass(WikipediaPartitioner.class); partitionerJob.setNumReduceTasks(0); @@ -193,95 +197,94 @@ public class WikipediaPartitionedIngester extends Configured implements Tool { SequenceFileOutputFormat.setOutputPath(partitionerJob, outputDir); SequenceFileOutputFormat.setCompressOutput(partitionerJob, true); SequenceFileOutputFormat.setOutputCompressionType(partitionerJob, CompressionType.RECORD); - + return partitionerJob.waitForCompletion(true) ? 0 : 1; } - - private int runIngestJob() throws Exception - { - Job ingestJob = new Job(getConf(), "Ingest Partitioned Wikipedia"); + + private int runIngestJob() throws Exception { + Job ingestJob = Job.getInstance(getConf(), "Ingest Partitioned Wikipedia"); Configuration ingestConf = ingestJob.getConfiguration(); ingestConf.set("mapred.map.tasks.speculative.execution", "false"); configureIngestJob(ingestJob); - + String tablename = WikipediaConfiguration.getTableName(ingestConf); - + Connector connector = WikipediaConfiguration.getConnector(ingestConf); - + TableOperations tops = connector.tableOperations(); - + createTables(tops, tablename); - + ingestJob.setMapperClass(WikipediaPartitionedMapper.class); ingestJob.setNumReduceTasks(0); - + // setup input format ingestJob.setInputFormatClass(SequenceFileInputFormat.class); - SequenceFileInputFormat.setInputPaths(ingestJob, WikipediaConfiguration.getPartitionedArticlesPath(ingestConf)); + SequenceFileInputFormat.setInputPaths(ingestJob, + WikipediaConfiguration.getPartitionedArticlesPath(ingestConf)); // TODO make split size configurable - SequenceFileInputFormat.setMinInputSplitSize(ingestJob, WikipediaConfiguration.getMinInputSplitSize(ingestConf)); + SequenceFileInputFormat.setMinInputSplitSize(ingestJob, + WikipediaConfiguration.getMinInputSplitSize(ingestConf)); // setup output format ingestJob.setMapOutputKeyClass(Text.class); ingestJob.setMapOutputValueClass(Mutation.class); - - if(WikipediaConfiguration.bulkIngest(ingestConf)) - { + + if (WikipediaConfiguration.bulkIngest(ingestConf)) { ingestJob.setOutputFormatClass(SortingRFileOutputFormat.class); - SortingRFileOutputFormat.setMaxBufferSize(ingestConf, WikipediaConfiguration.bulkIngestBufferSize(ingestConf)); + SortingRFileOutputFormat.setMaxBufferSize(ingestConf, + WikipediaConfiguration.bulkIngestBufferSize(ingestConf)); String bulkIngestDir = WikipediaConfiguration.bulkIngestDir(ingestConf); - if(bulkIngestDir == null) - { + if (bulkIngestDir == null) { log.error("Bulk ingest dir not set"); return 1; } - SortingRFileOutputFormat.setPathName(ingestConf, WikipediaConfiguration.bulkIngestDir(ingestConf)); + SortingRFileOutputFormat.setPathName(ingestConf, + WikipediaConfiguration.bulkIngestDir(ingestConf)); } else { ingestJob.setOutputFormatClass(AccumuloOutputFormat.class); - ClientConfiguration clientConfig = new ClientConfiguration(); - clientConfig.setProperty(ClientProperty.INSTANCE_NAME, WikipediaConfiguration.getInstanceName(ingestConf)); - clientConfig.setProperty(ClientProperty.INSTANCE_ZK_HOST, WikipediaConfiguration.getZookeepers(ingestConf)); + ClientConfiguration clientConfig = ClientConfiguration.create(); + clientConfig.setProperty(ClientProperty.INSTANCE_NAME, + WikipediaConfiguration.getInstanceName(ingestConf)); + clientConfig.setProperty(ClientProperty.INSTANCE_ZK_HOST, + WikipediaConfiguration.getZookeepers(ingestConf)); String user = WikipediaConfiguration.getUser(ingestConf); byte[] password = WikipediaConfiguration.getPassword(ingestConf); AccumuloOutputFormat.setConnectorInfo(ingestJob, user, new PasswordToken(password)); AccumuloOutputFormat.setZooKeeperInstance(ingestJob, clientConfig); } - + return ingestJob.waitForCompletion(true) ? 0 : 1; } - - private int loadBulkFiles() throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException - { + + private int loadBulkFiles() + throws IOException, AccumuloException, AccumuloSecurityException, TableNotFoundException { Configuration conf = getConf(); Connector connector = WikipediaConfiguration.getConnector(conf); - + FileSystem fs = FileSystem.get(conf); String directory = WikipediaConfiguration.bulkIngestDir(conf); - + String failureDirectory = WikipediaConfiguration.bulkIngestFailureDir(conf); - - for(FileStatus status: fs.listStatus(new Path(directory))) - { - if(status.isDir() == false) + + for (FileStatus status : fs.listStatus(new Path(directory))) { + if (status.isDir() == false) { continue; + } Path dir = status.getPath(); - Path failPath = new Path(failureDirectory+"/"+dir.getName()); + Path failPath = new Path(failureDirectory + "/" + dir.getName()); fs.mkdirs(failPath); - connector.tableOperations().importDirectory(dir.getName(), dir.toString(), failPath.toString(), true); + connector.tableOperations().importDirectory(dir.getName(), dir.toString(), + failPath.toString(), true); } - + return 0; } - - public final static PathFilter partFilter = new PathFilter() { - @Override - public boolean accept(Path path) { - return path.getName().startsWith("part"); - }; - }; - + + public final static PathFilter partFilter = path -> path.getName().startsWith("part"); + protected void configurePartitionerJob(Job job) { Configuration conf = job.getConfiguration(); job.setJarByClass(WikipediaPartitionedIngester.class); @@ -293,10 +296,11 @@ public class WikipediaPartitionedIngester extends Configured implements Tool { protected void configureIngestJob(Job job) { job.setJarByClass(WikipediaPartitionedIngester.class); } - + protected static final Pattern filePattern = Pattern.compile("([a-z_]+).*.xml(.bz2)?"); - - protected void listFiles(Path path, FileSystem fs, List files, Set languages) throws IOException { + + protected void listFiles(Path path, FileSystem fs, List files, Set languages) + throws IOException { for (FileStatus status : fs.listStatus(path)) { if (status.isDir()) { listFiles(status.getPath(), fs, files, languages); diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java index 09755c0..9eda61d 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/AggregatingRecordReader.java @@ -16,7 +16,6 @@ */ package org.apache.accumulo.examples.wikisearch.reader; - import java.io.IOException; import org.apache.accumulo.examples.wikisearch.ingest.WikipediaConfiguration; @@ -27,18 +26,17 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.TaskAttemptContext; - /** - * This class aggregates Text values based on a start and end filter. An example use case for this would be XML data. This will not work with data that has - * nested start and stop tokens. - * + * This class aggregates Text values based on a start and end filter. An example use case for this + * would be XML data. This will not work with data that has nested start and stop tokens. + * */ public class AggregatingRecordReader extends LongLineRecordReader { - + public static final String START_TOKEN = "aggregating.token.start"; public static final String END_TOKEN = "aggregating.token.end"; public static final String RETURN_PARTIAL_MATCHES = "aggregating.allow.partial"; - + private LongWritable key = new LongWritable(); private String startToken = null; private String endToken = null; @@ -47,35 +45,40 @@ public class AggregatingRecordReader extends LongLineRecordReader { private boolean startFound = false; private StringBuilder remainder = new StringBuilder(0); private boolean returnPartialMatches = false; - + @Override public LongWritable getCurrentKey() { key.set(counter); return key; } - + @Override public Text getCurrentValue() { return aggValue; } - + @Override public void initialize(InputSplit genericSplit, TaskAttemptContext context) throws IOException { - super.initialize(((WikipediaInputSplit)genericSplit).getFileSplit(), context); - this.startToken = WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class); - this.endToken = WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class); - this.returnPartialMatches = context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false); - + super.initialize(((WikipediaInputSplit) genericSplit).getFileSplit(), context); + this.startToken = + WikipediaConfiguration.isNull(context.getConfiguration(), START_TOKEN, String.class); + this.endToken = + WikipediaConfiguration.isNull(context.getConfiguration(), END_TOKEN, String.class); + this.returnPartialMatches = + context.getConfiguration().getBoolean(RETURN_PARTIAL_MATCHES, false); + /* - * Text-appending works almost exactly like the + operator on Strings- it creates a byte array exactly the size of [prefix + suffix] and dumps the bytes - * into the new array. This module works by doing lots of little additions, one line at a time. With most XML, the documents are partitioned on line - * boundaries, so we will generally have lots of additions. Setting a large default byte array for a text object can avoid this and give us - * StringBuilder-like functionality for Text objects. + * Text-appending works almost exactly like the + operator on Strings- it creates a byte array + * exactly the size of [prefix + suffix] and dumps the bytes into the new array. This module + * works by doing lots of little additions, one line at a time. With most XML, the documents are + * partitioned on line boundaries, so we will generally have lots of additions. Setting a large + * default byte array for a text object can avoid this and give us StringBuilder-like + * functionality for Text objects. */ byte[] txtBuffer = new byte[2048]; aggValue.set(txtBuffer); } - + @Override public boolean nextKeyValue() throws IOException { aggValue.clear(); @@ -83,10 +86,11 @@ public class AggregatingRecordReader extends LongLineRecordReader { boolean finished = false; // Find the start token while (!finished && (((hasNext = super.nextKeyValue()) == true) || remainder.length() > 0)) { - if (hasNext) + if (hasNext) { finished = process(super.getCurrentValue()); - else + } else { finished = process(null); + } if (finished) { startFound = false; counter++; @@ -104,24 +108,25 @@ public class AggregatingRecordReader extends LongLineRecordReader { } return false; } - + /** * Populates aggValue with the contents of the Text object. - * - * @param t + * * @return true if aggValue is complete, else false and needs more data. */ private boolean process(Text t) { - - if (null != t) + + if (null != t) { remainder.append(t.toString()); + } while (remainder.length() > 0) { if (!startFound) { // If found, then begin aggregating at the start offset int start = remainder.indexOf(startToken); if (-1 != start) { // Append the start token to the aggregate value - TextUtil.textAppendNoNull(aggValue, remainder.substring(start, start + startToken.length()), false); + TextUtil.textAppendNoNull(aggValue, + remainder.substring(start, start + startToken.length()), false); // Remove to the end of the start token from the remainder remainder.delete(0, start + startToken.length()); startFound = true; @@ -157,7 +162,8 @@ public class AggregatingRecordReader extends LongLineRecordReader { return true; } else { // END_TOKEN was found. Extract to the end of END_TOKEN - TextUtil.textAppendNoNull(aggValue, remainder.substring(0, end + endToken.length()), false); + TextUtil.textAppendNoNull(aggValue, remainder.substring(0, end + endToken.length()), + false); // Remove from remainder up to the end of END_TOKEN remainder.delete(0, end + endToken.length()); return true; @@ -167,5 +173,5 @@ public class AggregatingRecordReader extends LongLineRecordReader { } return false; } - + } diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java index a4da0ad..29ace56 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/reader/LfLineReader.java @@ -34,77 +34,78 @@ public class LfLineReader { private int bufferLength = 0; // the current position in the buffer private int bufferPosn = 0; - + private static final byte LF = '\n'; - + /** * Create a line reader that reads from the given stream using the default buffer-size (64k). - * + * * @param in * The input stream - * @throws IOException */ public LfLineReader(InputStream in) { this(in, DEFAULT_BUFFER_SIZE); } - + /** * Create a line reader that reads from the given stream using the given buffer-size. - * + * * @param in * The input stream * @param bufferSize * Size of the read buffer - * @throws IOException */ public LfLineReader(InputStream in, int bufferSize) { this.in = in; this.bufferSize = bufferSize; this.buffer = new byte[this.bufferSize]; } - + /** - * Create a line reader that reads from the given stream using the io.file.buffer.size specified in the given Configuration. - * + * Create a line reader that reads from the given stream using the + * io.file.buffer.size specified in the given Configuration. + * * @param in * input stream * @param conf * configuration - * @throws IOException */ public LfLineReader(InputStream in, Configuration conf) throws IOException { this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE)); } - + /** * Close the underlying stream. - * - * @throws IOException */ public void close() throws IOException { in.close(); } - + /** - * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF). EOF also terminates an otherwise unterminated line. - * + * Read one line from the InputStream into the given Text. A line can be terminated by '\n' (LF). + * EOF also terminates an otherwise unterminated line. + * * @param str * the object to store the given line (without newline) * @param maxLineLength - * the maximum number of bytes to store into str; the rest of the line is silently discarded. + * the maximum number of bytes to store into str; the rest of the line is silently + * discarded. * @param maxBytesToConsume - * the maximum number of bytes to consume in this call. This is only a hint, because if the line cross this threshold, we allow it to happen. It can - * overshoot potentially by as much as one buffer length. - * + * the maximum number of bytes to consume in this call. This is only a hint, because if + * the line cross this threshold, we allow it to happen. It can overshoot potentially by + * as much as one buffer length. + * * @return the number of bytes read including the (longest) newline found. - * + * * @throws IOException * if the underlying stream throws */ public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException { /* - * We're reading data from in, but the head of the stream may be already buffered in buffer, so we have several cases: 1. No newline characters are in the - * buffer, so we need to copy everything and read another buffer from the stream. 2. An unambiguously terminated line is in buffer, so we just copy to str. + * We're reading data from in, but the head of the stream may be already buffered in buffer, so + * we have several cases: 1. No newline characters are in the buffer, so we need to copy + * everything and read another buffer from the stream. 2. An unambiguously terminated line is in + * buffer, so we just copy to str. */ str.clear(); int txtLength = 0; // tracks str.getLength(), as an optimization @@ -115,8 +116,9 @@ public class LfLineReader { if (bufferPosn >= bufferLength) { startPosn = bufferPosn = 0; bufferLength = in.read(buffer); - if (bufferLength <= 0) + if (bufferLength <= 0) { break; // EOF + } } for (; bufferPosn < bufferLength; ++bufferPosn) { // search for newline if (buffer[bufferPosn] == LF) { @@ -136,15 +138,16 @@ public class LfLineReader { txtLength += appendLength; } } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume); - - if (bytesConsumed > Integer.MAX_VALUE) + + if (bytesConsumed > Integer.MAX_VALUE) { throw new IOException("Too many bytes before newline: " + bytesConsumed); + } return (int) bytesConsumed; } - + /** * Read from the InputStream into the given Text. - * + * * @param str * the object to store the given line * @param maxLineLength @@ -156,10 +159,10 @@ public class LfLineReader { public int readLine(Text str, int maxLineLength) throws IOException { return readLine(str, maxLineLength, Integer.MAX_VALUE); } - + /** * Read from the InputStream into the given Text. - * + * * @param str * the object to store the given line * @return the number of bytes read including the newline @@ -169,5 +172,5 @@ public class LfLineReader { public int readLine(Text str) throws IOException { return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE); } - + } diff --git a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java index 1623d55..4471b10 100644 --- a/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java +++ b/ingest/src/main/java/org/apache/accumulo/examples/wikisearch/util/TextUtil.java @@ -23,10 +23,10 @@ import org.apache.accumulo.core.iterators.user.SummingCombiner; import org.apache.hadoop.io.Text; public class TextUtil { - + /** * Appends a null byte followed by the UTF-8 bytes of the given string to the given {@link Text} - * + * * @param text * the Text to which to append * @param string @@ -36,32 +36,32 @@ public class TextUtil { appendNullByte(text); textAppendNoNull(text, string); } - + public static void textAppend(Text text, String string, boolean replaceBadChar) { appendNullByte(text); textAppendNoNull(text, string, replaceBadChar); } - + public static void textAppend(Text t, long s) { t.append(nullByte, 0, 1); t.append(SummingCombiner.FIXED_LEN_ENCODER.encode(s), 0, 8); } - + private static final byte[] nullByte = {0}; - + /** * Appends a null byte to the given text - * + * * @param text * the text to which to append the null byte */ public static void appendNullByte(Text text) { text.append(nullByte, 0, nullByte.length); } - + /** * Appends the UTF-8 bytes of the given string to the given {@link Text} - * + * * @param t * the Text to which to append * @param s @@ -70,13 +70,9 @@ public class TextUtil { public static void textAppendNoNull(Text t, String s) { textAppendNoNull(t, s, false); } - + /** * Appends the UTF-8 bytes of the given string to the given {@link Text} - * - * @param t - * @param s - * @param replaceBadChar */ public static void textAppendNoNull(Text t, String s, boolean replaceBadChar) { try { @@ -86,11 +82,11 @@ public class TextUtil { throw new IllegalArgumentException(cce); } } - + /** - * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to UTF-8 and is much faster than calling - * {@link String#getBytes(String)}. - * + * Converts the given string its UTF-8 bytes. This uses Hadoop's method for converting string to + * UTF-8 and is much faster than calling {@link String#getBytes(String)}. + * * @param string * the string to convert * @return the UTF-8 representation of the string diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java index f6b2791..777d7b6 100644 --- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java +++ b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/ingest/WikipediaInputSplitTest.java @@ -23,11 +23,10 @@ import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; -import junit.framework.Assert; - import org.apache.accumulo.examples.wikisearch.ingest.WikipediaInputFormat.WikipediaInputSplit; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.lib.input.FileSplit; +import org.junit.Assert; import org.junit.Test; public class WikipediaInputSplitTest { @@ -44,22 +43,22 @@ public class WikipediaInputSplitTest { split.write(out); out.close(); baos.close(); - + ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); DataInput in = new ObjectInputStream(bais); - + WikipediaInputSplit split2 = new WikipediaInputSplit(); split2.readFields(in); Assert.assertTrue(bais.available() == 0); bais.close(); - + Assert.assertTrue(split.getPartition() == split2.getPartition()); - + FileSplit fSplit2 = split2.getFileSplit(); Assert.assertTrue(fSplit.getPath().equals(fSplit2.getPath())); Assert.assertTrue(fSplit.getStart() == fSplit2.getStart()); Assert.assertTrue(fSplit.getLength() == fSplit2.getLength()); - + String[] hosts2 = fSplit2.getLocations(); Assert.assertEquals(hosts.length, hosts2.length); for (int i = 0; i < hosts.length; i++) { diff --git a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java index 7297b5a..3c87f3f 100644 --- a/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java +++ b/ingest/src/test/java/org/apache/accumulo/examples/wikisearch/iterator/TextIndexTest.java @@ -20,13 +20,12 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import junit.framework.Assert; - import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight; import org.apache.accumulo.examples.wikisearch.protobuf.TermWeight.Info.Builder; import org.junit.After; +import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -35,72 +34,72 @@ import com.google.protobuf.InvalidProtocolBufferException; public class TextIndexTest { private TextIndexCombiner combiner; private List values; - + @Before public void setup() throws Exception { combiner = new TextIndexCombiner(); combiner.init(null, Collections.singletonMap("all", "true"), null); - values = new ArrayList(); + values = new ArrayList<>(); } - + @After public void cleanup() { - + } - + private TermWeight.Info.Builder createBuilder() { return TermWeight.Info.newBuilder(); } - + @Test public void testSingleValue() throws InvalidProtocolBufferException { Builder builder = createBuilder(); builder.addWordOffset(1); builder.addWordOffset(5); builder.setNormalizedTermFrequency(0.1f); - + values.add(new Value(builder.build().toByteArray())); - + Value result = combiner.reduce(new Key(), values.iterator()); - + TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); - + Assert.assertTrue(info.getNormalizedTermFrequency() == 0.1f); - + List offsets = info.getWordOffsetList(); Assert.assertTrue(offsets.size() == 2); Assert.assertTrue(offsets.get(0) == 1); Assert.assertTrue(offsets.get(1) == 5); } - + @Test public void testAggregateTwoValues() throws InvalidProtocolBufferException { Builder builder = createBuilder(); builder.addWordOffset(1); builder.addWordOffset(5); builder.setNormalizedTermFrequency(0.1f); - + values.add(new Value(builder.build().toByteArray())); - + builder = createBuilder(); builder.addWordOffset(3); builder.setNormalizedTermFrequency(0.05f); - + values.add(new Value(builder.build().toByteArray())); - + Value result = combiner.reduce(new Key(), values.iterator()); - + TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); - + Assert.assertTrue(info.getNormalizedTermFrequency() == 0.15f); - + List offsets = info.getWordOffsetList(); Assert.assertTrue(offsets.size() == 3); Assert.assertTrue(offsets.get(0) == 1); Assert.assertTrue(offsets.get(1) == 3); Assert.assertTrue(offsets.get(2) == 5); } - + @Test public void testAggregateManyValues() throws InvalidProtocolBufferException { Builder builder = createBuilder(); @@ -108,28 +107,28 @@ public class TextIndexTest { builder.addWordOffset(15); builder.addWordOffset(19); builder.setNormalizedTermFrequency(0.12f); - + values.add(new Value(builder.build().toByteArray())); - + builder = createBuilder(); builder.addWordOffset(1); builder.addWordOffset(5); builder.setNormalizedTermFrequency(0.1f); - + values.add(new Value(builder.build().toByteArray())); - + builder = createBuilder(); builder.addWordOffset(3); builder.setNormalizedTermFrequency(0.05f); - + values.add(new Value(builder.build().toByteArray())); - + Value result = combiner.reduce(new Key(), values.iterator()); - + TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); - + Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f); - + List offsets = info.getWordOffsetList(); Assert.assertTrue(offsets.size() == 6); Assert.assertTrue(offsets.get(0) == 1); @@ -139,7 +138,7 @@ public class TextIndexTest { Assert.assertTrue(offsets.get(4) == 15); Assert.assertTrue(offsets.get(5) == 19); } - + @Test public void testEmptyValue() throws InvalidProtocolBufferException { Builder builder = createBuilder(); @@ -147,32 +146,32 @@ public class TextIndexTest { builder.addWordOffset(15); builder.addWordOffset(19); builder.setNormalizedTermFrequency(0.12f); - + values.add(new Value("".getBytes())); values.add(new Value(builder.build().toByteArray())); values.add(new Value("".getBytes())); - + builder = createBuilder(); builder.addWordOffset(1); builder.addWordOffset(5); builder.setNormalizedTermFrequency(0.1f); - + values.add(new Value(builder.build().toByteArray())); values.add(new Value("".getBytes())); - + builder = createBuilder(); builder.addWordOffset(3); builder.setNormalizedTermFrequency(0.05f); - + values.add(new Value(builder.build().toByteArray())); values.add(new Value("".getBytes())); - + Value result = combiner.reduce(new Key(), values.iterator()); - + TermWeight.Info info = TermWeight.Info.parseFrom(result.get()); - + Assert.assertTrue(info.getNormalizedTermFrequency() == 0.27f); - + List offsets = info.getWordOffsetList(); Assert.assertTrue(offsets.size() == 6); Assert.assertTrue(offsets.get(0) == 1); diff --git a/pom.xml b/pom.xml index d62928a..fdaba5e 100644 --- a/pom.xml +++ b/pom.xml @@ -19,14 +19,14 @@ 4.0.0 - org.apache - apache - 18 + org.apache + apache + 21 org.apache.accumulo accumulo-wikisearch - 1.8.0 + 2.0.0-SNAPSHOT pom accumulo-wikisearch @@ -37,15 +37,18 @@ UTF-8 - ${project.version} + 1.8 + 1.8 + 2.0.0-alpha-2 3.2.1 1.5 + 1.10 2.0.1 2.4 1.0.1.Final - 11.0.2 - 2.6.4 - 3.1.0-incubating + 27.0-jre + 3.1.1 + 3.2.0-incubating 2.1.0.GA 4.11 1.04 @@ -56,50 +59,71 @@ 3.0.2 1.2 2.5.0 - 0.9.3 - 3.3.1 + 0.12.0 + 3.4.14 - - org.apache.accumulo - accumulo-fate - ${version.accumulo} - - - org.apache.hadoop - hadoop-client - ${version.hadoop} - - - commons-collections - commons-collections - ${version.collections} - - - commons-logging - commons-logging - 1.1.1 - - - org.apache.commons - commons-math3 - 3.6.1 - - - junit - junit - ${version.junit} - - + + javaee + javaee-api + 5 + + + org.apache.hadoop + hadoop-client + ${version.hadoop} + + + org.apache.kerby + kerb-simplekdc + + + + + org.apache.htrace + htrace-core + ${version.htrace} + + + com.google.code.findbugs + jsr305 + 3.0.2 + + + commons-collections + commons-collections + ${version.collections} + + + commons-io + commons-io + 2.5 + + + commons-logging + commons-logging + 1.1.1 + + + org.apache.commons + commons-math3 + 3.6.1 + + + junit + junit + ${version.junit} + + org.apache.zookeeper zookeeper ${version.zookeeper} - - jline - jline - + + jline + jline + @@ -149,6 +173,11 @@ ${version.commons-codec} + commons-configuration + commons-configuration + ${version.commons-configuration} + + commons-lang commons-lang ${version.commons-lang} @@ -176,6 +205,11 @@ ${version.accumulo} + org.apache.accumulo + wikisearch-ingest + ${project.version} + + org.apache.commons commons-jexl ${version.commons-jexl} @@ -210,11 +244,11 @@ libthrift ${version.thrift} - - - org.apache.httpcomponents - httpcore - + + + org.apache.httpcomponents + httpcore + @@ -232,6 +266,21 @@ jackson-mapper-asl 1.9.13 + + com.fasterxml.jackson.core + jackson-annotations + 2.7.8 + + + com.google.code.gson + gson + 2.8.5 + + + commons-beanutils + commons-beanutils + 1.9.3 + @@ -239,15 +288,6 @@ false - central - Maven Repository Switchboard - http://repo1.maven.org/maven2 - default - - - - false - java.net java.net https://maven.java.net/content/groups/public @@ -255,21 +295,19 @@ - package + + + + org.apache.maven.plugins + maven-ejb-plugin + 2.1 + + + org.apache.maven.plugins - maven-compiler-plugin - 3.1 - - 1.7 - 1.7 - - - - org.apache.maven.plugins maven-enforcer-plugin - 1.3.1 enforce-mvn @@ -287,7 +325,6 @@ org.apache.maven.plugins maven-clean-plugin - 2.5 @@ -302,7 +339,6 @@ org.apache.maven.plugins maven-jar-plugin - 2.4 lib @@ -321,42 +357,25 @@ org.apache.maven.plugins - maven-resources-plugin - 2.6 - - UTF-8 - - - - org.apache.maven.plugins maven-javadoc-plugin - 2.9.1 - UTF-8 true lib docs - 1.6 + 1.8 -J-Xmx512m org.apache.maven.plugins maven-source-plugin - 2.2.1 lib org.apache.maven.plugins - maven-surefire-plugin - 2.17 - - - org.apache.maven.plugins maven-dependency-plugin - 2.8 false @@ -367,7 +386,7 @@ prepare-package ../../lib - + commons-collections,commons-configuration,commons-io,commons-lang,jline,log4j,libthrift,commons-jci-core,commons-jci-fam,commons-logging,commons-logging-api accumulo true diff --git a/query-war/pom.xml b/query-war/pom.xml index 45a1f1b..c1404fd 100644 --- a/query-war/pom.xml +++ b/query-war/pom.xml @@ -20,7 +20,7 @@ org.apache.accumulo accumulo-wikisearch - 1.8.0 + 2.0.0-SNAPSHOT wikisearch-query-war @@ -30,17 +30,7 @@ org.apache.maven.plugins - maven-compiler-plugin - 3.1 - - 1.6 - 1.6 - - - - org.apache.maven.plugins maven-war-plugin - 2.1.1 true diff --git a/query/pom.xml b/query/pom.xml index e022965..48b2477 100644 --- a/query/pom.xml +++ b/query/pom.xml @@ -20,7 +20,7 @@ org.apache.accumulo accumulo-wikisearch - 1.8.0 + 2.0.0-SNAPSHOT wikisearch-query ejb @@ -53,7 +53,6 @@ commons-configuration commons-configuration - 1.6 commons-lang @@ -66,7 +65,6 @@ org.apache.accumulo wikisearch-ingest - ${project.version} org.apache.commons @@ -79,7 +77,6 @@ javaee javaee-api - 5 provided @@ -90,23 +87,11 @@ commons-io commons-io - 2.4 - runtime - - - org.apache.accumulo - accumulo-fate - runtime - - - org.apache.accumulo - accumulo-trace runtime org.apache.htrace htrace-core - ${version.htrace} runtime @@ -155,27 +140,15 @@ org.apache.maven.plugins maven-assembly-plugin - 2.4 src/assembly/dist.xml - gnu - - - - org.apache.maven.plugins - maven-compiler-plugin - 3.1 - - 1.6 - 1.6 org.apache.maven.plugins maven-ejb-plugin - 2.1 3.1 diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java index 87b4da2..73319fd 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AbstractEvaluatingIterator.java @@ -38,33 +38,39 @@ import org.apache.accumulo.examples.wikisearch.parser.QueryEvaluator; import org.apache.commons.jexl2.parser.ParseException; import org.apache.log4j.Logger; - import com.esotericsoftware.kryo.Kryo; /** - * - * This iterator aggregates rows together using the specified key comparator. Subclasses will provide their own implementation of fillMap which will fill the - * supplied EventFields object with field names (key) and field values (value). After all fields have been put into the aggregated object (by aggregating all - * columns with the same key), the EventFields object will be compared against the supplied expression. If the expression returns true, then the return key and - * return value can be retrieved via getTopKey() and getTopValue(). - * - * Optionally, the caller can set an expression (field operator value) that should not be evaluated against the event. For example, if the query is - * "A == 'foo' and B == 'bar'", but for some reason B may not be in the data, then setting the UNEVALUATED_EXPRESSIONS option to "B == 'bar'" will allow the - * events to be evaluated against the remainder of the expression and still return as true. - * - * By default this iterator will return all Events in the shard. If the START_DATE and END_DATE are specified, then this iterator will evaluate the timestamp of - * the key against the start and end dates. If the event date is not within the range of start to end, then it is skipped. - * - * This iterator will return up the stack an EventFields object serialized using Kryo in the cell Value. - * + * + * This iterator aggregates rows together using the specified key comparator. Subclasses will + * provide their own implementation of fillMap which will fill the supplied EventFields object with + * field names (key) and field values (value). After all fields have been put into the aggregated + * object (by aggregating all columns with the same key), the EventFields object will be compared + * against the supplied expression. If the expression returns true, then the return key and return + * value can be retrieved via getTopKey() and getTopValue(). + * + * Optionally, the caller can set an expression (field operator value) that should not be evaluated + * against the event. For example, if the query is "A == 'foo' and B == 'bar'", but for some reason + * B may not be in the data, then setting the UNEVALUATED_EXPRESSIONS option to "B == 'bar'" will + * allow the events to be evaluated against the remainder of the expression and still return as + * true. + * + * By default this iterator will return all Events in the shard. If the START_DATE and END_DATE are + * specified, then this iterator will evaluate the timestamp of the key against the start and end + * dates. If the event date is not within the range of start to end, then it is skipped. + * + * This iterator will return up the stack an EventFields object serialized using Kryo in the cell + * Value. + * */ -public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterator, OptionDescriber { - +public abstract class AbstractEvaluatingIterator + implements SortedKeyValueIterator, OptionDescriber { + private static Logger log = Logger.getLogger(AbstractEvaluatingIterator.class); protected static final byte[] NULL_BYTE = new byte[0]; public static final String QUERY_OPTION = "expr"; public static final String UNEVALUTED_EXPRESSIONS = "unevaluated.expressions"; - + private PartialKey comparator = null; protected SortedKeyValueIterator iterator; private Key currentKey = new Key(); @@ -76,34 +82,35 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat private static Kryo kryo = new Kryo(); private Range seekRange = null; private Set skipExpressions = null; - + protected AbstractEvaluatingIterator(AbstractEvaluatingIterator other, IteratorEnvironment env) { iterator = other.iterator.deepCopy(env); event = other.event; } - + public AbstractEvaluatingIterator() {} - + /** - * Implementations will return the PartialKey value to use for comparing keys for aggregating events - * + * Implementations will return the PartialKey value to use for comparing keys for aggregating + * events + * * @return the type of comparator to use */ public abstract PartialKey getKeyComparator(); - + /** - * When the query expression evaluates to true against the event, the event fields will be serialized into the Value and returned up the iterator stack. - * Implemenations will need to provide a key to be used with the event. - * - * @param k + * When the query expression evaluates to true against the event, the event fields will be + * serialized into the Value and returned up the iterator stack. Implemenations will need to + * provide a key to be used with the event. + * * @return the key that should be returned with the map of values. */ public abstract Key getReturnKey(Key k) throws Exception; - + /** - * Implementations will need to fill the map with field visibilities, names, and values. When all fields have been aggregated the event will be evaluated - * against the query expression. - * + * Implementations will need to fill the map with field visibilities, names, and values. When all + * fields have been aggregated the event will be evaluated against the query expression. + * * @param event * Multimap of event names and fields. * @param key @@ -112,44 +119,43 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat * current Value */ public abstract void fillMap(EventFields event, Key key, Value value) throws Exception; - + /** - * Provides the ability to skip this key and all of the following ones that match using the comparator. - * - * @param key + * Provides the ability to skip this key and all of the following ones that match using the + * comparator. + * * @return true if the key should be acted upon, otherwise false. - * @throws IOException */ public abstract boolean isKeyAccepted(Key key) throws IOException; - + /** * Reset state. */ public void reset() { event.clear(); } - + private void aggregateRowColumn(EventFields event) throws IOException { - + currentKey.set(iterator.getTopKey()); - + try { fillMap(event, iterator.getTopKey(), iterator.getTopValue()); iterator.next(); - + while (iterator.hasTop() && iterator.getTopKey().equals(currentKey, this.comparator)) { fillMap(event, iterator.getTopKey(), iterator.getTopValue()); iterator.next(); } - + // Get the return key returnKey = getReturnKey(currentKey); } catch (Exception e) { throw new IOException("Error aggregating event", e); } - + } - + private void findTop() throws IOException { do { reset(); @@ -161,10 +167,10 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat while (iterator.hasTop() && !isKeyAccepted(iterator.getTopKey())) { iterator.next(); } - + if (iterator.hasTop()) { aggregateRowColumn(event); - + // Evaluate the event against the expression if (event.size() > 0 && this.evaluator.evaluate(event)) { if (log.isDebugEnabled()) { @@ -191,33 +197,38 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat log.debug("Iterator.hasTop() == false"); } } while (returnValue == null && iterator.hasTop()); - + // Sanity check. Make sure both returnValue and returnKey are null or both are not null - if (!((returnKey == null && returnValue == null) || (returnKey != null && returnValue != null))) { + if (!((returnKey == null && returnValue == null) + || (returnKey != null && returnValue != null))) { log.warn("Key: " + ((returnKey == null) ? "null" : returnKey.toString())); log.warn("Value: " + ((returnValue == null) ? "null" : returnValue.toString())); throw new IOException("Return values are inconsistent"); } } - + + @Override public Key getTopKey() { if (returnKey != null) { return returnKey; } return iterator.getTopKey(); } - + + @Override public Value getTopValue() { if (returnValue != null) { return returnValue; } return iterator.getTopValue(); } - + + @Override public boolean hasTop() { return returnKey != null || iterator.hasTop(); } - + + @Override public void next() throws IOException { if (returnKey != null) { returnKey = null; @@ -225,53 +236,55 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat } else if (iterator.hasTop()) { iterator.next(); } - + findTop(); } - + /** * Copy of IteratorUtil.maximizeStartKeyTimeStamp due to IllegalAccessError - * - * @param range - * @return */ static Range maximizeStartKeyTimeStamp(Range range) { Range seekRange = range; - + if (range.getStartKey() != null && range.getStartKey().getTimestamp() != Long.MAX_VALUE) { Key seekKey = new Key(seekRange.getStartKey()); seekKey.setTimestamp(Long.MAX_VALUE); seekRange = new Range(seekKey, true, range.getEndKey(), range.isEndKeyInclusive()); } - + return seekRange; } - - public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) + throws IOException { // do not want to seek to the middle of a value that should be // aggregated... - + seekRange = maximizeStartKeyTimeStamp(range); - + iterator.seek(seekRange, columnFamilies, inclusive); findTop(); - + if (range.getStartKey() != null) { - while (hasTop() && getTopKey().equals(range.getStartKey(), this.comparator) && getTopKey().getTimestamp() > range.getStartKey().getTimestamp()) { + while (hasTop() && getTopKey().equals(range.getStartKey(), this.comparator) + && getTopKey().getTimestamp() > range.getStartKey().getTimestamp()) { // the value has a more recent time stamp, so // pass it up // log.debug("skipping "+getTopKey()); next(); } - + while (hasTop() && range.beforeStartKey(getTopKey())) { next(); } } - + } - - public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { + + @Override + public void init(SortedKeyValueIterator source, Map options, + IteratorEnvironment env) throws IOException { validateOptions(options); event = new EventFields(); this.comparator = getKeyComparator(); @@ -292,31 +305,36 @@ public abstract class AbstractEvaluatingIterator implements SortedKeyValueIterat } EventFields.initializeKryo(kryo); } - + + @Override public IteratorOptions describeOptions() { - Map options = new HashMap(); + Map options = new HashMap<>(); options.put(QUERY_OPTION, "query expression"); options.put(UNEVALUTED_EXPRESSIONS, "comma separated list of expressions to skip"); - return new IteratorOptions(getClass().getSimpleName(), "evaluates event objects against an expression", options, null); + return new IteratorOptions(getClass().getSimpleName(), + "evaluates event objects against an expression", options, null); } - + + @Override public boolean validateOptions(Map options) { - if (!options.containsKey(QUERY_OPTION)) + if (!options.containsKey(QUERY_OPTION)) { return false; - else + } else { this.expression = options.get(QUERY_OPTION); - + } + if (options.containsKey(UNEVALUTED_EXPRESSIONS)) { String expressionList = options.get(UNEVALUTED_EXPRESSIONS); if (expressionList != null && !expressionList.trim().equals("")) { - this.skipExpressions = new HashSet(); - for (String e : expressionList.split(",")) + this.skipExpressions = new HashSet<>(); + for (String e : expressionList.split(",")) { this.skipExpressions.add(e); + } } } return true; } - + public String getQueryExpression() { return this.expression; } diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java index 47a55e1..3b03823 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/AndIterator.java @@ -35,7 +35,7 @@ import org.apache.hadoop.io.Text; import org.apache.log4j.Logger; public class AndIterator implements SortedKeyValueIterator { - + protected static final Logger log = Logger.getLogger(AndIterator.class); private TermSource[] sources; private int sourcesCount = 0; @@ -49,27 +49,28 @@ public class AndIterator implements SortedKeyValueIterator { private Text currentDocID = new Text(emptyByteArray); private static boolean SEEK_INCLUSIVE = true; private Text parentEndRow; - + /** * Used in representing a Term that is intersected on. */ protected static class TermSource { - + public SortedKeyValueIterator iter; public Text dataLocation; public Text term; public boolean notFlag; private Collection seekColumnFamilies; - + private TermSource(TermSource other) { this(other.iter, other.dataLocation, other.term, other.notFlag); } - + public TermSource(SortedKeyValueIterator iter, Text dataLocation, Text term) { this(iter, dataLocation, term, false); } - - public TermSource(SortedKeyValueIterator iter, Text dataLocation, Text term, boolean notFlag) { + + public TermSource(SortedKeyValueIterator iter, Text dataLocation, Text term, + boolean notFlag) { this.iter = iter; this.dataLocation = dataLocation; ByteSequence bs = new ArrayByteSequence(dataLocation.getBytes(), 0, dataLocation.getLength()); @@ -77,75 +78,71 @@ public class AndIterator implements SortedKeyValueIterator { this.term = term; this.notFlag = notFlag; } - + public String getTermString() { return (this.term == null) ? new String("Iterator") : this.term.toString(); } } - + /* - * | Row | Column Family | Column Qualifier | Value - * | {RowID} | {dataLocation} | {term}\0{dataType}\0{UID} | Empty + * | Row | Column Family | Column Qualifier | Value | {RowID} | {dataLocation} | + * {term}\0{dataType}\0{UID} | Empty */ protected Text getPartition(Key key) { return key.getRow(); } - + /** * Returns the given key's dataLocation - * - * @param key + * * @return The given key's dataLocation */ protected Text getDataLocation(Key key) { return key.getColumnFamily(); } - + /** * Returns the given key's term - * - * @param key + * * @return The given key's term */ protected Text getTerm(Key key) { int idx = 0; String sKey = key.getColumnQualifier().toString(); - + idx = sKey.indexOf("\0"); return new Text(sKey.substring(0, idx)); } - + /** * Returns the given key's DocID - * - * @param key + * * @return The given key's DocID */ protected Text getDocID(Key key) { int idx = 0; String sKey = key.getColumnQualifier().toString(); - + idx = sKey.indexOf("\0"); return new Text(sKey.substring(idx + 1)); } - + /** * Returns the given key's UID - * - * @param key + * * @return The given key's UID */ protected String getUID(Key key) { int idx = 0; String sKey = key.getColumnQualifier().toString(); - + idx = sKey.indexOf("\0"); return sKey.substring(idx + 1); } - + /** * Build a key from the given row and dataLocation - * + * * @param row * The desired row * @param dataLocation @@ -155,10 +152,10 @@ public class AndIterator implements SortedKeyValueIterator { protected Key buildKey(Text row, Text dataLocation) { return new Key(row, (dataLocation == null) ? nullText : dataLocation); } - + /** * Build a key from the given row, dataLocation, and term - * + * * @param row * The desired row * @param dataLocation @@ -168,12 +165,13 @@ public class AndIterator implements SortedKeyValueIterator { * @return A Key object built from the given row, dataLocation, and term. */ protected Key buildKey(Text row, Text dataLocation, Text term) { - return new Key(row, (dataLocation == null) ? nullText : dataLocation, (term == null) ? nullText : term); + return new Key(row, (dataLocation == null) ? nullText : dataLocation, + (term == null) ? nullText : term); } - + /** * Return the key that directly follows the given key - * + * * @param key * The key who will be directly before the returned key * @return The key directly following the given key. @@ -181,95 +179,102 @@ public class AndIterator implements SortedKeyValueIterator { protected Key buildFollowingPartitionKey(Key key) { return key.followingKey(PartialKey.ROW); } - + /** * Empty default constructor */ public AndIterator() {} - + + @Override public SortedKeyValueIterator deepCopy(IteratorEnvironment env) { return new AndIterator(this, env); } - + public AndIterator(AndIterator other, IteratorEnvironment env) { if (other.sources != null) { sourcesCount = other.sourcesCount; sources = new TermSource[sourcesCount]; for (int i = 0; i < sourcesCount; i++) { - sources[i] = new TermSource(other.sources[i].iter.deepCopy(env), other.sources[i].dataLocation, other.sources[i].term); + sources[i] = new TermSource(other.sources[i].iter.deepCopy(env), + other.sources[i].dataLocation, other.sources[i].term); } } } - + + @Override public Key getTopKey() { return topKey; } - + + @Override public Value getTopValue() { return value; } - + + @Override public boolean hasTop() { return currentRow != null; } - + /** - * Find the next key in the current TermSource that is at or beyond the cursor (currentRow, currentTerm, currentDocID). - * - * @param sourceID - * The index of the current source in sources + * Find the next key in the current TermSource that is at or beyond the cursor (currentRow, + * currentTerm, currentDocID). + * + * The index of the current source in sources + * * @return True if the source advanced beyond the cursor - * @throws IOException */ private boolean seekOneSource(TermSource ts) throws IOException { /* - * Within this loop progress must be made in one of the following forms: - currentRow, currentTerm, or curretDocID must be increased - the given source must - * advance its iterator This loop will end when any of the following criteria are met - the iterator for the given source is pointing to the key - * (currentRow, columnFamilies[sourceID], currentTerm, currentDocID) - the given source is out of data and currentRow is set to null - the given source has - * advanced beyond the endRow and currentRow is set to null + * Within this loop progress must be made in one of the following forms: - currentRow, + * currentTerm, or curretDocID must be increased - the given source must advance its iterator + * This loop will end when any of the following criteria are met - the iterator for the given + * source is pointing to the key (currentRow, columnFamilies[sourceID], currentTerm, + * currentDocID) - the given source is out of data and currentRow is set to null - the given + * source has advanced beyond the endRow and currentRow is set to null */ - + // precondition: currentRow is not null boolean advancedCursor = false; - + while (true) { if (ts.iter.hasTop() == false) { if (log.isDebugEnabled()) { log.debug("The current iterator no longer has a top"); } - + // If we got to the end of an iterator, found a Match if it's a NOT if (ts.notFlag) { break; } - + currentRow = null; // setting currentRow to null counts as advancing the cursor return true; } - + // check if we're past the end key int endCompare = -1; - + if (log.isDebugEnabled()) { log.debug("Current topKey = " + ts.iter.getTopKey()); } - + // we should compare the row to the end of the range if (overallRange.getEndKey() != null) { if (log.isDebugEnabled()) { log.debug("II.seekOneSource overallRange.getEndKey() != null"); } - + endCompare = overallRange.getEndKey().getRow().compareTo(ts.iter.getTopKey().getRow()); - + if ((!overallRange.isEndKeyInclusive() && endCompare <= 0) || endCompare < 0) { if (log.isDebugEnabled()) { log.debug("II.seekOneSource at the end of the tablet server"); } - + currentRow = null; - + // setting currentRow to null counts as advancing the cursor return true; } @@ -278,81 +283,84 @@ public class AndIterator implements SortedKeyValueIterator { log.debug("II.seekOneSource overallRange.getEndKey() == null"); } } - + // Compare the Row IDs int partitionCompare = currentRow.compareTo(getPartition(ts.iter.getTopKey())); if (log.isDebugEnabled()) { log.debug("Current partition: " + currentRow); } - + // check if this source is already at or beyond currentRow // if not, then seek to at least the current row if (partitionCompare > 0) { if (log.isDebugEnabled()) { log.debug("Need to seek to the current row"); - + // seek to at least the currentRow log.debug("ts.dataLocation = " + ts.dataLocation.getBytes()); log.debug("Term = " + new Text(ts.term + "\0" + currentDocID).getBytes()); } - - Key seekKey = buildKey(currentRow, ts.dataLocation, nullText);// new Text(ts.term + "\0" + currentDocID)); - + + Key seekKey = buildKey(currentRow, ts.dataLocation, nullText);// new Text(ts.term + "\0" + + // currentDocID)); + if (log.isDebugEnabled()) { log.debug("Seeking to: " + seekKey); } ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE); continue; } - + // check if this source has gone beyond currentRow // if so, advance currentRow if (partitionCompare < 0) { if (log.isDebugEnabled()) { log.debug("Went too far beyond the currentRow"); } - + if (ts.notFlag) { break; } - + currentRow.set(getPartition(ts.iter.getTopKey())); currentDocID.set(emptyByteArray); - + advancedCursor = true; continue; } - + // we have verified that the current source is positioned in currentRow // now we must make sure we're in the right columnFamily in the current row if (ts.dataLocation != null) { int dataLocationCompare = ts.dataLocation.compareTo(getDataLocation(ts.iter.getTopKey())); - + if (log.isDebugEnabled()) { log.debug("Comparing dataLocations"); log.debug("dataLocation = " + ts.dataLocation); log.debug("newDataLocation = " + getDataLocation(ts.iter.getTopKey())); } - + // check if this source is already on the right columnFamily // if not, then seek forwards to the right columnFamily if (dataLocationCompare > 0) { if (log.isDebugEnabled()) { log.debug("Need to seek to the right dataLocation"); } - - Key seekKey = buildKey(currentRow, ts.dataLocation, nullText);// , new Text(ts.term + "\0" + currentDocID)); - + + Key seekKey = buildKey(currentRow, ts.dataLocation, nullText);// , new Text(ts.term + "\0" + // + currentDocID)); + if (log.isDebugEnabled()) { log.debug("Seeking to: " + seekKey); } - - ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE); + + ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, + SEEK_INCLUSIVE); if (!ts.iter.hasTop()) { currentRow = null; return true; } - + continue; } // check if this source is beyond the right columnFamily @@ -361,26 +369,27 @@ public class AndIterator implements SortedKeyValueIterator { if (log.isDebugEnabled()) { log.debug("Went too far beyond the dataLocation"); } - + if (endCompare == 0) { // we're done currentRow = null; - + // setting currentRow to null counts as advancing the cursor return true; } - + // Seeking beyond the current dataLocation gives a valid negated result if (ts.notFlag) { break; } - + Key seekKey = buildFollowingPartitionKey(ts.iter.getTopKey()); - + if (log.isDebugEnabled()) { log.debug("Seeking to: " + seekKey); } - ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE); + ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, + SEEK_INCLUSIVE); if (!ts.iter.hasTop()) { currentRow = null; return true; @@ -388,164 +397,172 @@ public class AndIterator implements SortedKeyValueIterator { continue; } } - + // Compare the Terms int termCompare = ts.term.compareTo(getTerm(ts.iter.getTopKey())); if (log.isDebugEnabled()) { log.debug("term = " + ts.term); log.debug("newTerm = " + getTerm(ts.iter.getTopKey())); } - + // We need to seek down farther into the data if (termCompare > 0) { if (log.isDebugEnabled()) { log.debug("Need to seek to the right term"); } - Key seekKey = buildKey(currentRow, ts.dataLocation, new Text(ts.term + "\0"));// new Text(ts.term + "\0" + currentDocID)); - + Key seekKey = buildKey(currentRow, ts.dataLocation, new Text(ts.term + "\0"));// new + // Text(ts.term + // + "\0" + + // currentDocID)); + if (log.isDebugEnabled()) { log.debug("Seeking to: " + seekKey); } - + ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE); if (!ts.iter.hasTop()) { currentRow = null; return true; } - + // currentTerm = getTerm(ts.iter.getTopKey()); - + if (log.isDebugEnabled()) { log.debug("topKey after seeking to correct term: " + ts.iter.getTopKey()); } - + continue; } - - // We've jumped out of the current term, set the new term as currentTerm and start looking again + + // We've jumped out of the current term, set the new term as currentTerm and start looking + // again if (termCompare < 0) { if (log.isDebugEnabled()) { log.debug("TERM: Need to jump to the next row"); } - + if (endCompare == 0) { currentRow = null; - + return true; } - + if (ts.notFlag) { break; } - + Key seekKey = buildFollowingPartitionKey(ts.iter.getTopKey()); if (log.isDebugEnabled()) { log.debug("Using this key to find the next key: " + ts.iter.getTopKey()); log.debug("Seeking to: " + seekKey); } - + ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE); - + if (!ts.iter.hasTop()) { currentRow = null; return true; } - + currentTerm = getTerm(ts.iter.getTopKey()); - + continue; } - + // Compare the DocIDs Text docid = getDocID(ts.iter.getTopKey()); int docidCompare = currentDocID.compareTo(docid); - + if (log.isDebugEnabled()) { log.debug("Comparing DocIDs"); log.debug("currentDocID = " + currentDocID); log.debug("docid = " + docid); } - + // The source isn't at the right DOC if (docidCompare > 0) { if (log.isDebugEnabled()) { log.debug("Need to seek to the correct docid"); } - + // seek forwards - Key seekKey = buildKey(currentRow, ts.dataLocation, new Text(ts.term + "\0" + currentDocID)); - + Key seekKey = + buildKey(currentRow, ts.dataLocation, new Text(ts.term + "\0" + currentDocID)); + if (log.isDebugEnabled()) { log.debug("Seeking to: " + seekKey); } - + ts.iter.seek(new Range(seekKey, true, null, false), ts.seekColumnFamilies, SEEK_INCLUSIVE); - + continue; } - - // if this source has advanced beyond the current column qualifier then advance currentCQ and return true + + // if this source has advanced beyond the current column qualifier then advance currentCQ and + // return true if (docidCompare < 0) { if (ts.notFlag) { break; } - + if (log.isDebugEnabled()) { - log.debug("We went too far, update the currentDocID to be the location of where were seek'ed to"); + log.debug( + "We went too far, update the currentDocID to be the location of where were seek'ed to"); } - + currentDocID.set(docid); advancedCursor = true; break; } - + // Set the term as currentTerm (in case we found this record on the first try) currentTerm = getTerm(ts.iter.getTopKey()); - + if (log.isDebugEnabled()) { log.debug("currentTerm = " + currentTerm); } - + // If we're negated, next() the first TermSource since we guaranteed it was not a NOT term if (ts.notFlag) { sources[0].iter.next(); advancedCursor = true; } - + // If we got here, we have a match break; } - + return advancedCursor; } - + + @Override public void next() throws IOException { if (log.isDebugEnabled()) { log.debug("In ModifiedIntersectingIterator.next()"); } - + if (currentRow == null) { return; } - + // precondition: the current row is set up and the sources all have the same column qualifier // while we don't have a match, seek in the source with the smallest column qualifier sources[0].iter.next(); - + advanceToIntersection(); - + if (hasTop()) { if (overallRange != null && !overallRange.contains(topKey)) { topKey = null; } } } - + protected void advanceToIntersection() throws IOException { if (log.isDebugEnabled()) { log.debug("In AndIterator.advanceToIntersection()"); } - + boolean cursorChanged = true; while (cursorChanged) { // seek all of the sources to at least the highest seen column qualifier in the current row @@ -561,61 +578,62 @@ public class AndIterator implements SortedKeyValueIterator { } } } - + topKey = buildKey(currentRow, currentTerm, currentDocID); - + if (log.isDebugEnabled()) { log.debug("ModifiedIntersectingIterator: Got a match: " + topKey); } } - + public static String stringTopKey(SortedKeyValueIterator iter) { if (iter.hasTop()) { return iter.getTopKey().toString(); } return ""; } - + public static final String columnFamiliesOptionName = "columnFamilies"; public static final String termValuesOptionName = "termValues"; public static final String notFlagsOptionName = "notFlags"; - + /** * Encode a Text array of all the columns to intersect on - * + * * @param columns * The columns to be encoded * @return A Base64 encoded string (using a \n delimiter) of all columns to intersect on. */ public static String encodeColumns(Text[] columns) { StringBuilder sb = new StringBuilder(); - for (int i = 0; i < columns.length; i++) { - sb.append(new String(Base64.encodeBase64(TextUtil.getBytes(columns[i])))); + for (Text column : columns) { + sb.append(new String(Base64.encodeBase64(TextUtil.getBytes(column)))); sb.append('\n'); } return sb.toString(); } - + /** - * Encode a Text array of all of the terms to intersect on. The terms should match the columns in a one-to-one manner - * + * Encode a Text array of all of the terms to intersect on. The terms should match + * the columns in a one-to-one manner + * * @param terms * The terms to be encoded * @return A Base64 encoded string (using a \n delimiter) of all terms to intersect on. */ public static String encodeTermValues(Text[] terms) { StringBuilder sb = new StringBuilder(); - for (int i = 0; i < terms.length; i++) { - sb.append(new String(Base64.encodeBase64(TextUtil.getBytes(terms[i])))); + for (Text term : terms) { + sb.append(new String(Base64.encodeBase64(TextUtil.getBytes(term)))); sb.append('\n'); } - + return sb.toString(); } - + /** * Encode an array of booleans denoted which columns are NOT'ed - * + * * @param flags * The array of NOTs * @return A base64 encoded string of which columns are NOT'ed @@ -631,10 +649,10 @@ public class AndIterator implements SortedKeyValueIterator { } return new String(Base64.encodeBase64(bytes)); } - + /** * Decode the encoded columns into a Text array - * + * * @param columns * The Base64 encoded String of the columns * @return A Text array of the decoded columns @@ -645,13 +663,13 @@ public class AndIterator implements SortedKeyValueIterator { for (int i = 0; i < columnStrings.length; i++) { columnTexts[i] = new Text(Base64.decodeBase64(columnStrings[i].getBytes())); } - + return columnTexts; } - + /** * Decode the encoded terms into a Text array - * + * * @param terms * The Base64 encoded String of the terms * @return A Text array of decoded terms. @@ -662,14 +680,13 @@ public class AndIterator implements SortedKeyValueIterator { for (int i = 0; i < termStrings.length; i++) { termTexts[i] = new Text(Base64.decodeBase64(termStrings[i].getBytes())); } - + return termTexts; } - + /** * Decode the encoded NOT flags into a boolean array - * - * @param flags + * * @return A boolean array of decoded NOT flags */ public static boolean[] decodeBooleans(String flags) { @@ -677,7 +694,7 @@ public class AndIterator implements SortedKeyValueIterator { if (flags == null) { return null; } - + byte[] bytes = Base64.decodeBase64(flags.getBytes()); boolean[] bFlags = new boolean[bytes.length]; for (int i = 0; i < bytes.length; i++) { @@ -687,23 +704,25 @@ public class AndIterator implements SortedKeyValueIterator { bFlags[i] = false; } } - + return bFlags; } - - public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { + + @Override + public void init(SortedKeyValueIterator source, Map options, + IteratorEnvironment env) throws IOException { if (log.isDebugEnabled()) { log.debug("In AndIterator.init()"); } - + Text[] dataLocations = decodeColumns(options.get(columnFamiliesOptionName)); Text[] terms = decodeTermValues(options.get(termValuesOptionName)); boolean[] notFlags = decodeBooleans(options.get(notFlagsOptionName)); - + if (terms.length < 2) { throw new IllegalArgumentException("AndIterator requires two or more columns families"); } - + // Scan the not flags. // There must be at least one term that isn't negated // And we are going to re-order such that the first term is not a ! term @@ -713,8 +732,9 @@ public class AndIterator implements SortedKeyValueIterator { notFlags[i] = false; } } - - // Make sure that the first dataLocation/Term is not a NOT by swapping it with a later dataLocation/Term + + // Make sure that the first dataLocation/Term is not a NOT by swapping it with a later + // dataLocation/Term if (notFlags[0]) { for (int i = 1; i < notFlags.length; i++) { if (notFlags[i] == false) { @@ -722,34 +742,37 @@ public class AndIterator implements SortedKeyValueIterator { Text swap = new Text(terms[0]); terms[0].set(terms[i]); terms[i].set(swap); - + // Swap the dataLocations swap.set(dataLocations[0]); dataLocations[0].set(dataLocations[i]); dataLocations[i].set(swap); - + // Flip the notFlags notFlags[0] = false; notFlags[i] = true; break; } } - + if (notFlags[0]) { - throw new IllegalArgumentException("AndIterator requires at least one column family without not"); + throw new IllegalArgumentException( + "AndIterator requires at least one column family without not"); } } - + // Build up the array of sources that are to be intersected sources = new TermSource[dataLocations.length]; for (int i = 0; i < dataLocations.length; i++) { sources[i] = new TermSource(source.deepCopy(env), dataLocations[i], terms[i], notFlags[i]); } - + sourcesCount = dataLocations.length; } - - public void seek(Range range, Collection seekColumnFamilies, boolean inclusive) throws IOException { + + @Override + public void seek(Range range, Collection seekColumnFamilies, boolean inclusive) + throws IOException { if (log.isDebugEnabled()) { log.debug("In AndIterator.seek()"); log.debug("AndIterator.seek Given range => " + range); @@ -758,7 +781,7 @@ public class AndIterator implements SortedKeyValueIterator { currentDocID.set(emptyByteArray); doSeek(range); } - + private void doSeek(Range range) throws IOException { overallRange = new Range(range); @@ -766,7 +789,7 @@ public class AndIterator implements SortedKeyValueIterator { if (range.getEndKey() != null && range.getEndKey().getRow() != null) { this.parentEndRow = range.getEndKey().getRow(); } - + // seek each of the sources to the right column family within the row given by key for (int i = 0; i < sourcesCount; i++) { Key sourceKey; @@ -775,22 +798,25 @@ public class AndIterator implements SortedKeyValueIterator { // Build a key with the DocID if one is given if (range.getStartKey().getColumnFamily() != null) { sourceKey = buildKey(getPartition(range.getStartKey()), dataLocation, - (sources[i].term == null) ? nullText : new Text(sources[i].term + "\0" + range.getStartKey().getColumnFamily())); + (sources[i].term == null) ? nullText + : new Text(sources[i].term + "\0" + range.getStartKey().getColumnFamily())); } // Build a key with just the term. else { sourceKey = buildKey(getPartition(range.getStartKey()), dataLocation, (sources[i].term == null) ? nullText : sources[i].term); } - if (!range.isStartKeyInclusive()) + if (!range.isStartKeyInclusive()) { sourceKey = sourceKey.followingKey(PartialKey.ROW_COLFAM_COLQUAL); - sources[i].iter.seek(new Range(sourceKey, true, null, false), sources[i].seekColumnFamilies, SEEK_INCLUSIVE); + } + sources[i].iter.seek(new Range(sourceKey, true, null, false), sources[i].seekColumnFamilies, + SEEK_INCLUSIVE); } else { sources[i].iter.seek(range, sources[i].seekColumnFamilies, SEEK_INCLUSIVE); } } - + advanceToIntersection(); - + if (hasTop()) { if (overallRange != null && !overallRange.contains(topKey)) { topKey = null; @@ -800,12 +826,14 @@ public class AndIterator implements SortedKeyValueIterator { } } } - - public void addSource(SortedKeyValueIterator source, IteratorEnvironment env, Text term, boolean notFlag) { + + public void addSource(SortedKeyValueIterator source, IteratorEnvironment env, + Text term, boolean notFlag) { addSource(source, env, null, term, notFlag); } - - public void addSource(SortedKeyValueIterator source, IteratorEnvironment env, Text dataLocation, Text term, boolean notFlag) { + + public void addSource(SortedKeyValueIterator source, IteratorEnvironment env, + Text dataLocation, Text term, boolean notFlag) { // Check if we have space for the added Source if (sources == null) { sources = new TermSource[1]; @@ -821,45 +849,47 @@ public class AndIterator implements SortedKeyValueIterator { } sources = localSources; } - + sources[sourcesCount] = new TermSource(source.deepCopy(env), dataLocation, term, notFlag); sourcesCount++; } - + public boolean jump(Key jumpKey) throws IOException { if (log.isDebugEnabled()) { log.debug("jump: " + jumpKey); } - + // is the jumpKey outside my overall range? if (parentEndRow != null && parentEndRow.compareTo(jumpKey.getRow()) < 0) { // can't go there. if (log.isDebugEnabled()) { - log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow); + log.debug( + "jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow); } return false; } - + if (!hasTop()) { // TODO: will need to add current/last row if you want to measure if // we don't have topkey because we hit end of tablet. - + if (log.isDebugEnabled()) { log.debug("jump called, but topKey is null, must need to move to next row"); } return false; } else { - + int comp = this.topKey.getRow().compareTo(jumpKey.getRow()); // compare rows if (comp > 0) { if (log.isDebugEnabled()) { log.debug("jump, our row is ahead of jumpKey."); - log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow" + parentEndRow); + log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow" + + parentEndRow); } return hasTop(); // do nothing, we're ahead of jumpKey row } else if (comp < 0) { // a row behind jump key, need to move forward - + if (log.isDebugEnabled()) { log.debug("II jump, row jump"); } @@ -881,14 +911,14 @@ public class AndIterator implements SortedKeyValueIterator { } else { log.debug("myUid: " + myUid); } - + if (jumpUid == null) { log.debug("jumpUid is null"); } else { log.debug("jumpUid: " + jumpUid); } } - + int ucomp = myUid.compareTo(jumpUid); if (ucomp < 0) { // need to move all sources forward if (log.isDebugEnabled()) { @@ -898,7 +928,7 @@ public class AndIterator implements SortedKeyValueIterator { Range range = new Range(row); this.currentRow = row; this.currentDocID = new Text(this.getUID(jumpKey)); - + doSeek(range); // make sure it is in the range if we have one. @@ -909,8 +939,8 @@ public class AndIterator implements SortedKeyValueIterator { log.debug("jump, topKey is now: " + topKey); } return hasTop(); - - }// else do nothing + + } // else do nothing if (hasTop() && parentEndRow != null && topKey.getRow().compareTo(parentEndRow) > 0) { topKey = null; } diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java index 1324006..81317e7 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/BooleanLogicIterator.java @@ -65,11 +65,12 @@ import org.apache.log4j.Logger; import com.google.common.collect.Multimap; public class BooleanLogicIterator implements SortedKeyValueIterator, OptionDescriber { - - private static final Collection EMPTY_COL_FAMS = new ArrayList(); + + private static final Collection EMPTY_COL_FAMS = new ArrayList<>(); protected static final Logger log = Logger.getLogger(BooleanLogicIterator.class); public static final String QUERY_OPTION = "expr"; - public static final String TERM_CARDINALITIES = "TERM_CARDINALITIES"; // comma separated list of term : count + public static final String TERM_CARDINALITIES = "TERM_CARDINALITIES"; // comma separated list of + // term : count public static final String FIELD_INDEX_QUERY = "FIELD_INDEX_QUERY"; public static final String FIELD_NAME_PREFIX = "fi\0"; // -------------------------------------------------------------------------- @@ -80,55 +81,58 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, private SortedKeyValueIterator sourceIterator; private BooleanLogicTreeNode root; private PriorityQueue positives; - private ArrayList negatives = new ArrayList(); + private ArrayList negatives = new ArrayList<>(); private ArrayList rangerators; private String updatedQuery; - private Map termCardinalities = new HashMap(); + private Map termCardinalities = new HashMap<>(); private Range overallRange = null; private FieldIndexKeyParser keyParser; - + public BooleanLogicIterator() { keyParser = new FieldIndexKeyParser(); - rangerators = new ArrayList(); + rangerators = new ArrayList<>(); } - + public BooleanLogicIterator(BooleanLogicIterator other, IteratorEnvironment env) { if (other.sourceIterator != null) { this.sourceIterator = other.sourceIterator.deepCopy(env); } keyParser = new FieldIndexKeyParser(); - rangerators = new ArrayList(); + rangerators = new ArrayList<>(); log.debug("Congratulations, you've reached the BooleanLogicIterator"); } - + public static void setLogLevel(Level lev) { log.setLevel(lev); } - + public void setDebug(Level lev) { log.setLevel(lev); } - + + @Override public SortedKeyValueIterator deepCopy(IteratorEnvironment env) { return new BooleanLogicIterator(this, env); } - + /** - * init is responsible for setting up the iterator. It will pull the serialized boolean parse tree from the options mapping and construct the - * appropriate sub-iterators - * - * Once initialized, this iterator will automatically seek to the first matching instance. If no top key exists, that means an event matching the boolean - * logic did not exist in the partition. Subsequent calls to next will move the iterator and all sub-iterators to the next match. - * + * init is responsible for setting up the iterator. It will pull the serialized boolean + * parse tree from the options mapping and construct the appropriate sub-iterators + * + * Once initialized, this iterator will automatically seek to the first matching instance. If no + * top key exists, that means an event matching the boolean logic did not exist in the partition. + * Subsequent calls to next will move the iterator and all sub-iterators to the next match. + * * @param source * The underlying SortedkeyValueIterator. * @param options * A Map of options. * @param env * The iterator environment - * @throws IOException */ - public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { + @Override + public void init(SortedKeyValueIterator source, Map options, + IteratorEnvironment env) throws IOException { validateOptions(options); try { if (log.isDebugEnabled()) { @@ -136,7 +140,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } // Copy the source iterator sourceIterator = source.deepCopy(env); - + // Potentially take advantage of term cardinalities String[] terms = null; if (null != options.get(TERM_CARDINALITIES)) { @@ -148,14 +152,14 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + // Step 1: Parse the query if (log.isDebugEnabled()) { log.debug("QueryParser"); } QueryParser qp = new QueryParser(); qp.execute(this.updatedQuery); // validateOptions updates the updatedQuery - + // need to build the query tree based on jexl parsing. // Step 2: refactor QueryTree - inplace modification if (log.isDebugEnabled()) { @@ -163,17 +167,17 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } TreeNode tree = qp.getIteratorTree(); this.root = transformTreeNode(tree); - + if (log.isDebugEnabled()) { log.debug("refactorTree"); } this.root = refactorTree(this.root); - + if (log.isDebugEnabled()) { log.debug("collapseBranches"); } collapseBranches(root); - + // Step 3: create iterators where we need them. createIteratorTree(this.root); if (log.isDebugEnabled()) { @@ -181,18 +185,19 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } // Step 4: split the positive and negative leaves splitLeaves(this.root); - + } catch (ParseException ex) { log.error("ParseException in init: " + ex); throw new IllegalArgumentException("Failed to parse query", ex); } catch (Exception ex) { throw new IllegalArgumentException("probably had no indexed terms", ex); } - + } - - /* ************************************************************************* - * Methods for sub iterator creation. + + /* + * ************************************************************************* Methods for sub + * iterator creation. */ private void createIteratorTree(BooleanLogicTreeNode root) throws IOException { if (log.isDebugEnabled()) { @@ -201,7 +206,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, // Walk the tree, if all of your children are leaves, roll you into the // appropriate iterator. Enumeration dfe = root.depthFirstEnumeration(); - + while (dfe.hasMoreElements()) { BooleanLogicTreeNode node = (BooleanLogicTreeNode) dfe.nextElement(); if (!node.isLeaf() && node.getType() != ParserTreeConstants.JJTJEXLSCRIPT) { @@ -217,29 +222,32 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, node.setUserObject(createOrIterator(node)); } else { // throw an error. - log.debug("createIteratorTree, encounterd a node type I do not know about: " + node.getType()); + log.debug("createIteratorTree, encounterd a node type I do not know about: " + + node.getType()); log.debug("createIteratorTree, node contents: " + node.getContents()); } node.removeAllChildren(); } } } - + // now for remaining leaves, create basic iterators. // you can add in specialized iterator mappings here if necessary. dfe = root.depthFirstEnumeration(); while (dfe.hasMoreElements()) { BooleanLogicTreeNode node = (BooleanLogicTreeNode) dfe.nextElement(); - if (node.isLeaf() && node.getType() != ParserTreeConstants.JJTANDNODE && node.getType() != ParserTreeConstants.JJTORNODE) { + if (node.isLeaf() && node.getType() != ParserTreeConstants.JJTANDNODE + && node.getType() != ParserTreeConstants.JJTORNODE) { node.setUserObject(createFieldIndexIterator(node)); } } } - + private AndIterator createIntersectingIterator(BooleanLogicTreeNode node) throws IOException { if (log.isDebugEnabled()) { log.debug("createIntersectingIterator(node)"); - log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue() + " , operator: " + node.getFieldOperator()); + log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue() + + " , operator: " + node.getFieldOperator()); } Text[] columnFamilies = new Text[node.getChildCount()]; Text[] termValues = new Text[node.getChildCount()]; @@ -253,72 +261,77 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, negationMask[i] = child.isNegated(); i++; } - + AndIterator ii = new AndIterator(); - Map options = new HashMap(); + Map options = new HashMap<>(); options.put(AndIterator.columnFamiliesOptionName, AndIterator.encodeColumns(columnFamilies)); options.put(AndIterator.termValuesOptionName, AndIterator.encodeTermValues(termValues)); options.put(AndIterator.notFlagsOptionName, AndIterator.encodeBooleans(negationMask)); - + ii.init(sourceIterator.deepCopy(env), options, env); return ii; } - + private OrIterator createOrIterator(BooleanLogicTreeNode node) throws IOException { if (log.isDebugEnabled()) { log.debug("createOrIterator(node)"); - log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue() + " , operator: " + node.getFieldOperator()); + log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue() + + " , operator: " + node.getFieldOperator()); } - + Enumeration children = node.children(); - ArrayList fams = new ArrayList(); - ArrayList quals = new ArrayList(); + ArrayList fams = new ArrayList<>(); + ArrayList quals = new ArrayList<>(); while (children.hasMoreElements()) { BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement(); fams.add(child.getFieldName()); quals.add(child.getFieldValue()); } - + OrIterator iter = new OrIterator(); SortedKeyValueIterator source = sourceIterator.deepCopy(env); for (int i = 0; i < fams.size(); i++) { iter.addTerm(source, fams.get(i), quals.get(i), env); } - + return iter; } - + /* - * This takes the place of the SortedKeyIterator used previously. This iterator is bound to the partitioned table structure. When next is called it will jump - * rows as necessary internally versus needing to do it externally as was the case with the SortedKeyIterator. + * This takes the place of the SortedKeyIterator used previously. This iterator is bound to the + * partitioned table structure. When next is called it will jump rows as necessary internally + * versus needing to do it externally as was the case with the SortedKeyIterator. */ - private FieldIndexIterator createFieldIndexIterator(BooleanLogicTreeNode node) throws IOException { + private FieldIndexIterator createFieldIndexIterator(BooleanLogicTreeNode node) + throws IOException { if (log.isDebugEnabled()) { log.debug("BoolLogic.createFieldIndexIterator()"); - log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue() + " , operator: " + node.getFieldOperator()); + log.debug("fName: " + node.getFieldName() + " , fValue: " + node.getFieldValue() + + " , operator: " + node.getFieldOperator()); } Text rowId = null; sourceIterator.seek(new Range(), EMPTY_COL_FAMS, false); if (sourceIterator.hasTop()) { rowId = sourceIterator.getTopKey().getRow(); } - - FieldIndexIterator iter = new FieldIndexIterator(node.getType(), rowId, node.getFieldName(), node.getFieldValue(), node.isNegated(), - node.getFieldOperator()); - - Map options = new HashMap(); + + FieldIndexIterator iter = new FieldIndexIterator(node.getType(), rowId, node.getFieldName(), + node.getFieldValue(), node.isNegated(), node.getFieldOperator()); + + Map options = new HashMap<>(); iter.init(sourceIterator.deepCopy(env), options, env); if (log.isDebugEnabled()) { FieldIndexIterator.setLogLevel(Level.DEBUG); } else { FieldIndexIterator.setLogLevel(Level.OFF); } - + return iter; } - - /* ************************************************************************* - * Methods for testing the tree WRT boolean logic. + + /* + * ************************************************************************* Methods for testing + * the tree WRT boolean logic. */ // After all iterator pointers have been advanced, test if the current // record passes the boolean logic. @@ -330,13 +343,14 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, while (dfe.hasMoreElements()) { BooleanLogicTreeNode node = (BooleanLogicTreeNode) dfe.nextElement(); if (!node.isLeaf()) { - + int type = node.getType(); if (type == ParserTreeConstants.JJTANDNODE) { // BooleanLogicTreeNode.NodeType.AND) { handleAND(node); } else if (type == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) { handleOR(node); - } else if (type == ParserTreeConstants.JJTJEXLSCRIPT) {// BooleanLogicTreeNode.NodeType.HEAD) { + } else if (type == ParserTreeConstants.JJTJEXLSCRIPT) {// BooleanLogicTreeNode.NodeType.HEAD) + // { handleHEAD(node); } else if (type == ParserTreeConstants.JJTNOTNODE) { // BooleanLogicTreeNode.NodeType.NOT) { // there should not be any "NOT"s. @@ -344,14 +358,18 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } else { // it is a leaf, if it is an AND or OR do something - if (node.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) { //OrIterator + if (node.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) { + // //OrIterator node.setValid(node.hasTop()); node.reSet(); node.addToSet(node.getTopKey()); - - } else if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTEQNODE - || node.getType() == ParserTreeConstants.JJTERNODE || node.getType() == ParserTreeConstants.JJTLENODE - || node.getType() == ParserTreeConstants.JJTLTNODE || node.getType() == ParserTreeConstants.JJTGENODE + + } else if (node.getType() == ParserTreeConstants.JJTANDNODE + || node.getType() == ParserTreeConstants.JJTEQNODE + || node.getType() == ParserTreeConstants.JJTERNODE + || node.getType() == ParserTreeConstants.JJTLENODE + || node.getType() == ParserTreeConstants.JJTLTNODE + || node.getType() == ParserTreeConstants.JJTGENODE || node.getType() == ParserTreeConstants.JJTGTNODE) { // sub iterator guarantees it is in its internal range, // otherwise, no top. @@ -359,58 +377,69 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + if (log.isDebugEnabled()) { - log.debug("BoolLogic.testTreeState end, treeState:: " + this.root.getContents() + " , valid: " + root.isValid()); + log.debug("BoolLogic.testTreeState end, treeState:: " + this.root.getContents() + + " , valid: " + root.isValid()); } return this.root.isValid(); } - + private void handleHEAD(BooleanLogicTreeNode node) { Enumeration children = node.children(); while (children.hasMoreElements()) { BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement(); - - if (child.getType() == ParserTreeConstants.JJTANDNODE) {// BooleanLogicTreeNode.NodeType.AND) { + + if (child.getType() == ParserTreeConstants.JJTANDNODE) {// BooleanLogicTreeNode.NodeType.AND) + // { node.setValid(child.isValid()); node.setTopKey(child.getTopKey()); - } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) { + } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) + // { node.setValid(child.isValid()); node.setTopKey(child.getTopKey()); - } else if (child.getType() == ParserTreeConstants.JJTEQNODE || child.getType() == ParserTreeConstants.JJTERNODE - || child.getType() == ParserTreeConstants.JJTGTNODE || child.getType() == ParserTreeConstants.JJTGENODE - || child.getType() == ParserTreeConstants.JJTLTNODE || child.getType() == ParserTreeConstants.JJTLENODE) {// BooleanLogicTreeNode.NodeType.SEL) { + } else if (child.getType() == ParserTreeConstants.JJTEQNODE + || child.getType() == ParserTreeConstants.JJTERNODE + || child.getType() == ParserTreeConstants.JJTGTNODE + || child.getType() == ParserTreeConstants.JJTGENODE + || child.getType() == ParserTreeConstants.JJTLTNODE + || child.getType() == ParserTreeConstants.JJTLENODE) {// BooleanLogicTreeNode.NodeType.SEL) + // { node.setValid(true); node.setTopKey(child.getTopKey()); if (child.getTopKey() == null) { node.setValid(false); } } - }// end while - + } // end while + // I have to be valid AND have a top key if (node.isValid() && !node.hasTop()) { node.setValid(false); } } - + private void handleAND(BooleanLogicTreeNode me) { if (log.isDebugEnabled()) { log.debug("handleAND::" + me.getContents()); } Enumeration children = me.children(); me.setValid(true); // it's easier to prove false than true - - HashSet goodSet = new HashSet(); - HashSet badSet = new HashSet(); + + HashSet goodSet = new HashSet<>(); + HashSet badSet = new HashSet<>(); while (children.hasMoreElements()) { BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement(); - - if (child.getType() == ParserTreeConstants.JJTEQNODE || child.getType() == ParserTreeConstants.JJTANDNODE - || child.getType() == ParserTreeConstants.JJTERNODE || child.getType() == ParserTreeConstants.JJTNENODE - || child.getType() == ParserTreeConstants.JJTGENODE || child.getType() == ParserTreeConstants.JJTLENODE - || child.getType() == ParserTreeConstants.JJTGTNODE || child.getType() == ParserTreeConstants.JJTLTNODE) { - + + if (child.getType() == ParserTreeConstants.JJTEQNODE + || child.getType() == ParserTreeConstants.JJTANDNODE + || child.getType() == ParserTreeConstants.JJTERNODE + || child.getType() == ParserTreeConstants.JJTNENODE + || child.getType() == ParserTreeConstants.JJTGENODE + || child.getType() == ParserTreeConstants.JJTLENODE + || child.getType() == ParserTreeConstants.JJTGTNODE + || child.getType() == ParserTreeConstants.JJTLTNODE) { + if (child.isNegated()) { if (child.hasTop()) { badSet.add(child.getTopKey()); @@ -436,7 +465,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, me.setValid(false); return; } - + // if good set is empty, add it. if (goodSet.isEmpty()) { if (log.isDebugEnabled()) { @@ -448,17 +477,20 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, // if either fails, I'm false. if (!goodSet.contains(child.getTopKey())) { if (log.isDebugEnabled()) { - log.debug("handleAND, goodSet is not empty, and does NOT contain child, setting false. child: " + child.getContents()); + log.debug( + "handleAND, goodSet is not empty, and does NOT contain child, setting false. child: " + + child.getContents()); } me.setValid(false); return; } else { // trim the good set to this one value // (handles the case were the initial encounters were ORs) - goodSet = new HashSet(); + goodSet = new HashSet<>(); goodSet.add(child.getTopKey()); if (log.isDebugEnabled()) { - log.debug("handleAND, child in goodset, trim to this value: " + child.getContents()); + log.debug( + "handleAND, child in goodset, trim to this value: " + child.getContents()); } } } @@ -488,9 +520,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - - } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) { - + + } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) + // { + // NOTE: The OR may be an OrIterator in which case it will only produce // a single unique identifier, or it may be a pure logical construct and // be capable of producing multiple unique identifiers. @@ -501,7 +534,8 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, boolean pureNegations = true; if (!child.isValid()) { if (log.isDebugEnabled()) { - log.debug("handleAND, child is an OR and it is not valid, setting false, ALL NEGATED?: " + child.isChildrenAllNegated()); + log.debug("handleAND, child is an OR and it is not valid, setting false, ALL NEGATED?: " + + child.isChildrenAllNegated()); } me.setValid(false); // I'm an AND if one of my children is false, I'm false. return; @@ -537,7 +571,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + // is the goodSet still empty? that means were were only negations // otherwise, if it's not empty and we didn't match one, false if (child.isNegated()) { @@ -558,16 +592,16 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, me.setValid(false); return; } - + // we matched something, trim the set. // i.e. two child ORs goodSet = child.getIntersection(goodSet); } } - + } - }// end while - + } // end while + if (goodSet.isEmpty()) { // && log.isDebugEnabled()) { if (log.isDebugEnabled()) { log.debug("handleAND-> goodSet is empty, pure negations?"); @@ -579,7 +613,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + private void handleOR(BooleanLogicTreeNode me) { Enumeration children = me.children(); // I'm an OR node, need at least one positive. @@ -591,12 +625,16 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, // 3 cases for child: SEL, AND, OR // and negation BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement(); - if (child.getType() == ParserTreeConstants.JJTEQNODE || child.getType() == ParserTreeConstants.JJTNENODE - || child.getType() == ParserTreeConstants.JJTANDNODE || child.getType() == ParserTreeConstants.JJTERNODE - || child.getType() == ParserTreeConstants.JJTNRNODE || child.getType() == ParserTreeConstants.JJTLENODE - || child.getType() == ParserTreeConstants.JJTLTNODE || child.getType() == ParserTreeConstants.JJTGENODE + if (child.getType() == ParserTreeConstants.JJTEQNODE + || child.getType() == ParserTreeConstants.JJTNENODE + || child.getType() == ParserTreeConstants.JJTANDNODE + || child.getType() == ParserTreeConstants.JJTERNODE + || child.getType() == ParserTreeConstants.JJTNRNODE + || child.getType() == ParserTreeConstants.JJTLENODE + || child.getType() == ParserTreeConstants.JJTLTNODE + || child.getType() == ParserTreeConstants.JJTGENODE || child.getType() == ParserTreeConstants.JJTGTNODE) { - + if (child.hasTop()) { if (child.isNegated()) { // do nothing. @@ -612,8 +650,9 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, // that child could be pure negations in which case I'm true me.setValid(child.isValid()); } - - } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) { + + } else if (child.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) + // { if (child.hasTop()) { if (!child.isNegated()) { allNegated = false; @@ -634,8 +673,8 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - }// end while - + } // end while + if (allNegated) { // do all my children have top? children = me.children(); @@ -648,7 +687,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } me.setValid(false); - + } else { Key k = me.getMinUniqueID(); if (k == null) { @@ -659,9 +698,9 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - - /* ************************************************************************* - * Utility methods. + + /* + * ************************************************************************* Utility methods. */ // Transforms the TreeNode tree of query.parser into the // BooleanLogicTreeNodeJexl form. @@ -670,11 +709,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, if (log.isDebugEnabled()) { log.debug("Equals Node"); } - + Multimap terms = node.getTerms(); for (String fName : terms.keySet()) { Collection values = terms.get(fName); - + for (QueryTerm t : values) { if (null == t || null == t.getValue()) { continue; @@ -682,21 +721,22 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, String fValue = t.getValue().toString(); fValue = fValue.replace("'", ""); boolean negated = t.getOperator().equals("!="); - + if (!fName.startsWith(FIELD_NAME_PREFIX)) { fName = FIELD_NAME_PREFIX + fName; } - BooleanLogicTreeNode child = new BooleanLogicTreeNode(ParserTreeConstants.JJTEQNODE, fName, fValue, negated); + BooleanLogicTreeNode child = + new BooleanLogicTreeNode(ParserTreeConstants.JJTEQNODE, fName, fValue, negated); return child; } } } - + if (node.getType().equals(ASTERNode.class) || node.getType().equals(ASTNRNode.class)) { if (log.isDebugEnabled()) { log.debug("Regex Node"); } - + Multimap terms = node.getTerms(); for (String fName : terms.keySet()) { Collection values = terms.get(fName); @@ -707,23 +747,24 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, String fValue = t.getValue().toString(); fValue = fValue.replaceAll("'", ""); boolean negated = node.getType().equals(ASTNRNode.class); - + if (!fName.startsWith(FIELD_NAME_PREFIX)) { fName = FIELD_NAME_PREFIX + fName; } - - BooleanLogicTreeNode child = new BooleanLogicTreeNode(ParserTreeConstants.JJTERNODE, fName, fValue, negated); + + BooleanLogicTreeNode child = + new BooleanLogicTreeNode(ParserTreeConstants.JJTERNODE, fName, fValue, negated); return child; } } } - - if (node.getType().equals(ASTLTNode.class) || node.getType().equals(ASTLENode.class) || node.getType().equals(ASTGTNode.class) - || node.getType().equals(ASTGENode.class)) { + + if (node.getType().equals(ASTLTNode.class) || node.getType().equals(ASTLENode.class) + || node.getType().equals(ASTGTNode.class) || node.getType().equals(ASTGENode.class)) { Multimap terms = node.getTerms(); for (String fName : terms.keySet()) { Collection values = terms.get(fName); - + if (!fName.startsWith(FIELD_NAME_PREFIX)) { fName = FIELD_NAME_PREFIX + fName; } @@ -733,9 +774,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } String fValue = t.getValue().toString(); fValue = fValue.replaceAll("'", "").toLowerCase(); - boolean negated = false; // to be negated, must be child of Not, which is handled elsewhere. + boolean negated = false; // to be negated, must be child of Not, which is handled + // elsewhere. int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator()); - + BooleanLogicTreeNode child = new BooleanLogicTreeNode(mytype, fName, fValue, negated); if (log.isDebugEnabled()) { log.debug("adding child node: " + child.getContents()); @@ -744,11 +786,12 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + BooleanLogicTreeNode returnNode = null; - + if (node.getType().equals(ASTAndNode.class) || node.getType().equals(ASTOrNode.class)) { - int parentType = node.getType().equals(ASTAndNode.class) ? ParserTreeConstants.JJTANDNODE : ParserTreeConstants.JJTORNODE; + int parentType = node.getType().equals(ASTAndNode.class) ? ParserTreeConstants.JJTANDNODE + : ParserTreeConstants.JJTORNODE; if (log.isDebugEnabled()) { log.debug("AND/OR node: " + parentType); } @@ -777,7 +820,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } else { returnNode = new BooleanLogicTreeNode(parentType); - + } } else if (node.getType().equals(ASTNotNode.class)) { if (log.isDebugEnabled()) { @@ -788,7 +831,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, Multimap terms = node.getTerms(); for (String fName : terms.keySet()) { Collection values = terms.get(fName); - + if (!fName.startsWith(FIELD_NAME_PREFIX)) { fName = FIELD_NAME_PREFIX + fName; } @@ -800,7 +843,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, fValue = fValue.replaceAll("'", "").toLowerCase(); boolean negated = !t.getOperator().equals("!="); int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator()); - + if (!fName.startsWith(FIELD_NAME_PREFIX)) { fName = FIELD_NAME_PREFIX + fName; } @@ -810,8 +853,9 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } else { returnNode = new BooleanLogicTreeNode(ParserTreeConstants.JJTNOTNODE); } - } else if (node.getType().equals(ASTJexlScript.class) || node.getType().getSimpleName().equals("RootNode")) { - + } else if (node.getType().equals(ASTJexlScript.class) + || node.getType().getSimpleName().equals("RootNode")) { + if (log.isDebugEnabled()) { log.debug("ROOT/JexlScript node"); } @@ -821,7 +865,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, Multimap terms = node.getTerms(); for (String fName : terms.keySet()) { Collection values = terms.get(fName); - + if (!fName.startsWith(FIELD_NAME_PREFIX)) { fName = FIELD_NAME_PREFIX + fName; } @@ -833,7 +877,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, fValue = fValue.replaceAll("'", "").toLowerCase(); boolean negated = t.getOperator().equals("!="); int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator()); - + BooleanLogicTreeNode child = new BooleanLogicTreeNode(mytype, fName, fValue, negated); returnNode.add(child); return returnNode; @@ -843,38 +887,41 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, returnNode = new BooleanLogicTreeNode(ParserTreeConstants.JJTJEXLSCRIPT); } } else { - log.error("Currently Unsupported Node type: " + node.getClass().getName() + " \t" + node.getType()); + log.error( + "Currently Unsupported Node type: " + node.getClass().getName() + " \t" + node.getType()); } for (TreeNode child : node.getChildren()) { returnNode.add(transformTreeNode(child)); } - + return returnNode; } - + // After tree conflicts have been resolve, we can collapse branches where // leaves have been pruned. public static void collapseBranches(BooleanLogicTreeNode myroot) throws Exception { - + // NOTE: doing a depth first enumeration didn't wory when I started // removing nodes halfway through. The following method does work, // it's essentially a reverse breadth first traversal. - List nodes = new ArrayList(); + List nodes = new ArrayList<>(); Enumeration bfe = myroot.breadthFirstEnumeration(); - + while (bfe.hasMoreElements()) { BooleanLogicTreeNode node = (BooleanLogicTreeNode) bfe.nextElement(); nodes.add(node); } - + // walk backwards for (int i = nodes.size() - 1; i >= 0; i--) { BooleanLogicTreeNode node = nodes.get(i); if (log.isDebugEnabled()) { - log.debug("collapseBranches, inspecting node: " + node.toString() + " " + node.printNode()); + log.debug( + "collapseBranches, inspecting node: " + node.toString() + " " + node.printNode()); } - - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { + + if (node.getType() == ParserTreeConstants.JJTANDNODE + || node.getType() == ParserTreeConstants.JJTORNODE) { if (node.getChildCount() == 0 && !node.isRangeNode()) { node.removeFromParent(); } else if (node.getChildCount() == 1) { @@ -882,7 +929,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, BooleanLogicTreeNode c = (BooleanLogicTreeNode) node.getFirstChild(); node.removeFromParent(); p.add(c); - + } } else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) { if (node.getChildCount() == 0) { @@ -893,26 +940,27 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + } - + public BooleanLogicTreeNode refactorTree(BooleanLogicTreeNode myroot) { - List nodes = new ArrayList(); + List nodes = new ArrayList<>(); Enumeration bfe = myroot.breadthFirstEnumeration(); - + while (bfe.hasMoreElements()) { BooleanLogicTreeNode node = (BooleanLogicTreeNode) bfe.nextElement(); nodes.add(node); } - + // walk backwards for (int i = nodes.size() - 1; i >= 0; i--) { BooleanLogicTreeNode node = nodes.get(i); - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { + if (node.getType() == ParserTreeConstants.JJTANDNODE + || node.getType() == ParserTreeConstants.JJTORNODE) { // 1. check to see if all children are negated // 2. check to see if we have to handle ranges. - - Map ranges = new HashMap(); + + Map ranges = new HashMap<>(); Enumeration children = node.children(); boolean allNegated = true; while (children.hasMoreElements()) { @@ -921,7 +969,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, allNegated = false; // break; } - + // currently we are not allowing unbounded ranges, so they must sit under an AND node. if (node.getType() == ParserTreeConstants.JJTANDNODE) { // check for ranges @@ -979,10 +1027,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, if (allNegated) { node.setChildrenAllNegated(true); } - + // see if the AND node had a range. if (node.getType() == ParserTreeConstants.JJTANDNODE) { - + // if(ranges.containsKey(node.getFieldName())){ if (!ranges.isEmpty()) { // we have a range, process it @@ -994,7 +1042,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, node.setType(ParserTreeConstants.JJTORNODE); node.removeAllChildren(); // RangeBounds rb = ranges.get(node.getFieldName()); - + for (Entry entry : ranges.entrySet()) { Text fName = entry.getKey(); RangeBounds rb = entry.getValue(); @@ -1004,41 +1052,44 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, node.setUpperBound(rb.getUpper()); node.setRangeNode(true); } - + rangerators.add(node); - + if (log.isDebugEnabled()) { log.debug("refactor: " + node.getContents()); log.debug("refactor: " + node.getLowerBound() + " " + node.getUpperBound()); } - + } else { if (log.isDebugEnabled()) { log.debug("AND range more than 2 children"); } // node has range plus other children, create another node from the range // remove lt,le,gt,ge from parent and push in a single node - + // removing nodes via enumeration doesn't work, push into a list // and walk backwards - List temp = new ArrayList(); + List temp = new ArrayList<>(); Enumeration e = node.children(); while (e.hasMoreElements()) { BooleanLogicTreeNode c = (BooleanLogicTreeNode) e.nextElement(); temp.add(c); } - + for (int j = temp.size() - 1; j >= 0; j--) { BooleanLogicTreeNode c = temp.get(j); - if (c.getType() == JexlOperatorConstants.JJTLENODE || c.getType() == JexlOperatorConstants.JJTLTNODE - || c.getType() == JexlOperatorConstants.JJTGENODE || c.getType() == JexlOperatorConstants.JJTGTNODE) { + if (c.getType() == JexlOperatorConstants.JJTLENODE + || c.getType() == JexlOperatorConstants.JJTLTNODE + || c.getType() == JexlOperatorConstants.JJTGENODE + || c.getType() == JexlOperatorConstants.JJTGTNODE) { c.removeFromParent(); } } - + for (Entry entry : ranges.entrySet()) { Text fName = entry.getKey(); - BooleanLogicTreeNode nchild = new BooleanLogicTreeNode(ParserTreeConstants.JJTORNODE, fName.toString(), ""); + BooleanLogicTreeNode nchild = + new BooleanLogicTreeNode(ParserTreeConstants.JJTORNODE, fName.toString(), ""); RangeBounds rb = entry.getValue(); nchild.setFieldValue(new Text("")); nchild.setLowerBound(rb.getLower()); @@ -1047,21 +1098,21 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, node.add(nchild); rangerators.add(nchild); } - + if (log.isDebugEnabled()) { log.debug("refactor: " + node.getContents()); } } } } - + } } - + return myroot; - + } - + // If all children are of type SEL, roll this up into an AND or OR node. private static boolean canRollUp(BooleanLogicTreeNode parent) { if (log.isDebugEnabled()) { @@ -1076,21 +1127,23 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, Enumeration e = parent.children(); while (e.hasMoreElements()) { BooleanLogicTreeNode child = (BooleanLogicTreeNode) e.nextElement(); - + if (child.getType() != ParserTreeConstants.JJTEQNODE) {// BooleanLogicTreeNode.NodeType.SEL) { if (log.isDebugEnabled()) { - log.debug("canRollUp: child.getType -> " + ParserTreeConstants.jjtNodeName[child.getType()] + " int: " + child.getType() + " return false"); + log.debug( + "canRollUp: child.getType -> " + ParserTreeConstants.jjtNodeName[child.getType()] + + " int: " + child.getType() + " return false"); } return false; } - + if (child.isNegated()) { if (log.isDebugEnabled()) { log.debug("canRollUp: child.isNegated, return false"); } return false; } - + if (child.getFieldValue().toString().contains("*")) { if (log.isDebugEnabled()) { log.debug("canRollUp: child has wildcard: " + child.getFieldValue()); @@ -1100,10 +1153,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } return true; } - + /** - * Small utility function to print out the depth-first enumeration of the tree. Specify the root or sub root of the tree you wish to view. - * + * Small utility function to print out the depth-first enumeration of the tree. Specify the root + * or sub root of the tree you wish to view. + * * @param root * The root node of the tree or sub-tree. */ @@ -1117,7 +1171,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, System.out.println(i + " : " + n); } } - + public static void showBreadthFirstTraversal(BooleanLogicTreeNode root) { System.out.println("BreadthFirstTraversal"); log.debug("BooleanLogicIterator.showBreadthFirstTraversal()"); @@ -1130,19 +1184,19 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, log.debug(i + " : " + n); } } - + private void splitLeaves(BooleanLogicTreeNode node) { if (log.isDebugEnabled()) { log.debug("BoolLogic: splitLeaves()"); } - positives = new PriorityQueue(10, new BooleanLogicTreeNodeComparator()); + positives = new PriorityQueue<>(10, new BooleanLogicTreeNodeComparator()); // positives = new ArrayList(); negatives.clear(); - + Enumeration dfe = node.depthFirstEnumeration(); while (dfe.hasMoreElements()) { BooleanLogicTreeNode elem = (BooleanLogicTreeNode) dfe.nextElement(); - + if (elem.isLeaf()) { if (elem.isNegated()) { negatives.add(elem); @@ -1152,7 +1206,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + private void reHeapPriorityQueue(BooleanLogicTreeNode node) { positives.clear(); Enumeration dfe = node.depthFirstEnumeration(); @@ -1165,23 +1219,27 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } - /* ************************************************************************* - * The iterator interface methods. + /* + * ************************************************************************* The iterator + * interface methods. */ + @Override public boolean hasTop() { return (topKey != null); } - + + @Override public Key getTopKey() { if (log.isDebugEnabled()) { log.debug("getTopKey: " + topKey); } return topKey; } - + private void setTopKey(Key key) { if (this.overallRange != null && key != null) { - if (overallRange.getEndKey() != null) { // if null end key, that means range is to the end of the tablet. + if (overallRange.getEndKey() != null) { // if null end key, that means range is to the end of + // the tablet. if (!this.overallRange.contains(key)) { topKey = null; return; @@ -1190,21 +1248,22 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } topKey = key; } - + + @Override public Value getTopValue() { if (topValue == null) { topValue = new Value(new byte[0]); } return topValue; } - + private void resetNegatives() { for (BooleanLogicTreeNode neg : negatives) { neg.setTopKey(null); neg.setValid(true); } } - + private String getEventKeyUid(Key k) { if (k == null || k.getColumnFamily() == null) { return null; @@ -1212,7 +1271,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, return k.getColumnFamily().toString(); } } - + private String getIndexKeyUid(Key k) { try { int idx = 0; @@ -1223,17 +1282,18 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, return null; } } - + /* - * Remember, the Key in the BooleanLogicTreeNode is different structurally than the Key in its sub iterator because the key BooleanLogic needs to return is an - * event key created from the index key (which is what the sub iterators are looking at!) + * Remember, the Key in the BooleanLogicTreeNode is different structurally than the Key in its sub + * iterator because the key BooleanLogic needs to return is an event key created from the index + * key (which is what the sub iterators are looking at!) */ private Key getOptimizedAdvanceKey() throws IOException { if (log.isDebugEnabled()) { log.debug("getOptimizedAdvanceKey() called"); } Enumeration bfe = root.breadthFirstEnumeration(); - ArrayList bfl = new ArrayList(); + ArrayList bfl = new ArrayList<>(); while (bfe.hasMoreElements()) { BooleanLogicTreeNode node = (BooleanLogicTreeNode) bfe.nextElement(); if (!node.isNegated()) { @@ -1242,7 +1302,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, bfl.add(node); } } - + // walk the tree backwards for (int i = bfl.size() - 1; i >= 0; i--) { if (bfl.get(i).isLeaf() || bfl.get(i).isNegated()) { @@ -1251,7 +1311,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } continue; } - + BooleanLogicTreeNode node = bfl.get(i); node.setDone(false); if (log.isDebugEnabled()) { @@ -1264,11 +1324,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, boolean firstTime = true; while (children.hasMoreElements()) { BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement(); - + if (child.isNegated() || child.isChildrenAllNegated()) { continue; } - + // all advance keys were initially set from topkey for the leaves. if (child.getAdvanceKey() == null) { log.debug("\tchild does not advance key: " + child.printNode()); @@ -1278,7 +1338,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } else { log.debug("\tchild advanceKey: " + child.getAdvanceKey()); } - + if (firstTime) { firstTime = false; max = child; @@ -1287,10 +1347,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } continue; } - + log.debug("\tAND block, max: " + max); log.debug("\tAND block, child: " + child); - + // first test row if (max.getAdvanceKey().getRow().compareTo(child.getAdvanceKey().getRow()) < 0) { max = child; @@ -1299,7 +1359,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } continue; } - + // if rows are equal, test uids String uid_max = getEventKeyUid(max.getAdvanceKey()); String uid_child = getEventKeyUid(child.getAdvanceKey()); @@ -1315,7 +1375,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, log.debug("\tuid_child: " + uid_child); } } - + if (uid_max != null && uid_child != null) { if (uid_max.compareTo(uid_child) < 0) { max = child; @@ -1340,7 +1400,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } node.setDone(true); } - + } else if (node.getType() == ParserTreeConstants.JJTORNODE) { // get min BooleanLogicTreeNode min = null; @@ -1348,10 +1408,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, boolean firstTime = true; int numChildren = node.getChildCount(); int allChildrenDone = 0; - + while (children.hasMoreElements()) { BooleanLogicTreeNode child = (BooleanLogicTreeNode) children.nextElement(); - + if (log.isDebugEnabled()) { log.debug("\tOR block start, child: " + child); } @@ -1376,16 +1436,17 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, break; } } - + if (child.getAdvanceKey() == null) { log.debug("\tOR child doesn't have top or an AdvanceKey"); continue; } if (firstTime) { if (log.isDebugEnabled()) { - log.debug("\tOR block, first valid node, min=child: " + child + " advanceKey: " + child.getAdvanceKey()); + log.debug("\tOR block, first valid node, min=child: " + child + " advanceKey: " + + child.getAdvanceKey()); } - + firstTime = false; min = child; continue; @@ -1394,21 +1455,22 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, log.debug("\tOR block, min: " + min); log.debug("\tOR block, child: " + child); } - if (min.getAdvanceKey().getRow().toString().compareTo(child.getAdvanceKey().getRow().toString()) > 0) { + if (min.getAdvanceKey().getRow().toString() + .compareTo(child.getAdvanceKey().getRow().toString()) > 0) { // child row is less than min, set min to child min = child; if (log.isDebugEnabled()) { log.debug("\tmin row was greater than child, min=child: " + min); } continue; - + } else if (min.getAdvanceKey().getRow().compareTo(child.getAdvanceKey().getRow()) < 0) { // min row is less child, skip if (log.isDebugEnabled()) { log.debug("\tmin row less than childs, keep min: " + min); } continue; - + } else { // they're equal, test uids String uid_min = getEventKeyUid(min.getAdvanceKey()); String uid_child = getEventKeyUid(child.getAdvanceKey()); @@ -1417,7 +1479,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } if (uid_min != null && uid_child != null) { if (uid_min.compareTo(uid_child) > 0) { - + min = child; if (log.isDebugEnabled()) { log.debug("\tuid_min > uid_child, set min to child: " + min); @@ -1430,11 +1492,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, min = child; } } - }// end while + } // end while if (log.isDebugEnabled()) { log.debug("attemptOptimization: OR with children, min: " + min); } - + if (min != null) { if (log.isDebugEnabled()) { log.debug("OR block, min != null, advanceKey? " + min.getAdvanceKey()); @@ -1445,13 +1507,13 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, node.setAdvanceKey(null); node.setDone(true); } - + } else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) { // HEAD node if (log.isDebugEnabled()) { log.debug("getOptimizedAdvanceKey, HEAD node"); } BooleanLogicTreeNode child = (BooleanLogicTreeNode) node.getFirstChild(); - + if (child.isDone()) { if (log.isDebugEnabled()) { log.debug("Head node's child is done, need to move to the next row"); @@ -1472,21 +1534,23 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, k = new Key(row); child.setAdvanceKey(k); } - + } if (log.isDebugEnabled()) { log.debug("advance Key: " + child.getAdvanceKey()); } - Key key = new Key(child.getAdvanceKey().getRow(), child.getAdvanceKey().getColumnFamily(), child.getAdvanceKey().getColumnFamily()); + Key key = new Key(child.getAdvanceKey().getRow(), child.getAdvanceKey().getColumnFamily(), + child.getAdvanceKey().getColumnFamily()); return key; - - }// end else - }// end for + + } // end else + } // end for return null; } - + /* - * The incoming jump key has been formatted into the structure of an index key, but the leaves are eventkeys + * The incoming jump key has been formatted into the structure of an index key, but the leaves are + * eventkeys */ private boolean jump(Key jumpKey) throws IOException { if (log.isDebugEnabled()) { @@ -1497,7 +1561,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, BooleanLogicTreeNode n = (BooleanLogicTreeNode) bfe.nextElement(); n.setAdvanceKey(null); } // now advance all nodes to the advance key - + if (log.isDebugEnabled()) { log.debug("jump, All leaves need to advance to: " + jumpKey); } @@ -1512,7 +1576,8 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } return ok; } - + + @Override @SuppressWarnings("unused") public void next() throws IOException { if (log.isDebugEnabled()) { @@ -1524,12 +1589,12 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, setTopKey(null); return; } - + Key previousJumpKey = null; while (!finished) { - + Key jumpKey = this.getOptimizedAdvanceKey(); - + if (jumpKey == null) { // stop? if (log.isDebugEnabled()) { log.debug("next(), jump key is null, stopping"); @@ -1537,7 +1602,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, setTopKey(null); return; } - + if (log.isDebugEnabled()) { if (jumpKey != null) { log.debug("next(), jumpKey: " + jumpKey); @@ -1545,31 +1610,33 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, log.debug("jumpKey is null"); } } - + boolean same = false; if (jumpKey != null && topKey != null) { // check that the uid's are not the same same = getIndexKeyUid(jumpKey).equals(getEventKeyUid(topKey)); if (log.isDebugEnabled()) { - log.debug("jumpKeyUid: " + getIndexKeyUid(jumpKey) + " topKeyUid: " + getEventKeyUid(topKey)); + log.debug( + "jumpKeyUid: " + getIndexKeyUid(jumpKey) + " topKeyUid: " + getEventKeyUid(topKey)); } } - + if (log.isDebugEnabled()) { log.debug("previousJumpKey: " + previousJumpKey); log.debug("current JumpKey: " + jumpKey); } - + if (jumpKey != null && !this.overallRange.contains(jumpKey)) { if (log.isDebugEnabled()) { - log.debug("jumpKey is outside of range, that means the next key is out of range, stopping"); + log.debug( + "jumpKey is outside of range, that means the next key is out of range, stopping"); log.debug("jumpKey: " + jumpKey + " overallRange.endKey: " + overallRange.getEndKey()); } // stop setTopKey(null); return; } - + boolean previousSame = false; if (previousJumpKey != null && jumpKey != null) { previousSame = previousJumpKey.equals(jumpKey); @@ -1580,7 +1647,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, previousJumpKey = jumpKey; ok = jump(jumpKey); // attempt to jump everybody forward to this row and uid. // tryJump = false; - + // now test the tree state. if (testTreeState()) { Key tempKey = root.getTopKey(); @@ -1591,7 +1658,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, continue; } } - + if (root.getTopKey().equals(tempKey)) { // it's valid set nextKey and make sure it's not the same as topKey. if (log.isDebugEnabled()) { @@ -1600,10 +1667,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } else { log.debug("next, this.root.getTopKey() is null"); } - + if (topKey != null) { log.debug("topKey->" + topKey); - + } else { log.debug("topKey is null"); } @@ -1615,11 +1682,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + // -------------------------------------- // Regular next block } else { - + reHeapPriorityQueue(this.root); BooleanLogicTreeNode node; @@ -1628,13 +1695,13 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, if (!node.isDone() && node.hasTop()) { break; } - + if (positives.isEmpty()) { setTopKey(null); return; } } - + if (log.isDebugEnabled()) { if (jumpKey == null) { log.debug("no jump, jumpKey is null"); @@ -1648,7 +1715,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } node.next(); resetNegatives(); - + if (!node.hasTop()) { // it may be part of an or, so it could be ok. node.setValid(false); @@ -1665,7 +1732,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, finished = true; return; } - + } else { setTopKey(this.root.getTopKey()); return; @@ -1673,12 +1740,12 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } else { - + if (overallRange.contains(node.getTopKey())) { // the node had something so push it back into priority queue positives.add(node); } - + // now test the tree state. if (testTreeState()) { Key tempKey = root.getTopKey(); @@ -1689,7 +1756,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, continue; } } - + if (root.getTopKey().equals(tempKey)) { // it's valid set nextKey and make sure it's not the same as topKey. if (log.isDebugEnabled()) { @@ -1698,10 +1765,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } else { log.debug("next, this.root.getTopKey() is null"); } - + if (topKey != null) { log.debug("topKey->" + topKey); - + } else { log.debug("topKey is null"); } @@ -1724,9 +1791,9 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + } - + // is the priority queue empty? if (positives.isEmpty()) { finished = true; @@ -1735,7 +1802,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - + /* * create a range for the given row of the */ @@ -1745,17 +1812,19 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } Text rowID = k.getRow(); Text colFam = k.getColumnFamily(); - + for (BooleanLogicTreeNode neg : negatives) { - Key startKey = new Key(rowID, neg.getFieldName(), new Text(neg.getFieldValue() + "\0" + colFam)); - Key endKey = new Key(rowID, neg.getFieldName(), new Text(neg.getFieldValue() + "\0" + colFam + "\1")); + Key startKey = + new Key(rowID, neg.getFieldName(), new Text(neg.getFieldValue() + "\0" + colFam)); + Key endKey = + new Key(rowID, neg.getFieldName(), new Text(neg.getFieldValue() + "\0" + colFam + "\1")); Range range = new Range(startKey, true, endKey, false); - + if (log.isDebugEnabled()) { log.debug("range: " + range); } neg.seek(range, EMPTY_COL_FAMS, false); - + if (neg.hasTop()) { neg.setValid(false); } @@ -1768,8 +1837,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } } } - - public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { + + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) + throws IOException { this.overallRange = range; if (log.isDebugEnabled()) { log.debug("seek, overallRange: " + overallRange); @@ -1778,11 +1849,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, // NOTE: All of our iterators exist in the leaves. topKey = null; root.setTopKey(null); - + // set up the range iterators for the given seek range. // these should exist in the positives as OR iterators, but need special setup. setupRangerators(range); - + // don't take this out, if you jump rows on the tablet you could have // pulled nodes out of the positives priority queue. On a call to seek // it is usually jumping rows, so everything needs to become possibly @@ -1799,11 +1870,11 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, log.debug("leaf: " + node.getContents() + " topKey: " + tk); } } - + // Now that all nodes have been seek'd recreate the priorityQueue to sort them properly. splitLeaves(this.root); resetNegatives(); - + // test Tree, if it's not valid, call next if (testTreeState() && overallRange.contains(root.getTopKey())) { if (!negatives.isEmpty()) { @@ -1813,9 +1884,10 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, next(); } } - + if (log.isDebugEnabled()) { - log.debug("overallRange " + overallRange + " topKey " + this.root.getTopKey() + " contains " + overallRange.contains(this.root.getTopKey())); + log.debug("overallRange " + overallRange + " topKey " + this.root.getTopKey() + " contains " + + overallRange.contains(this.root.getTopKey())); } if (overallRange.contains(this.root.getTopKey()) && this.root.isValid()) { @@ -1828,14 +1900,14 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, // seek failed in the logic test, but there may be other possible // values which satisfy the logic tree. Make sure our iterators aren't // all null, and then call next. - + // if(!root.hasTop()){ if (log.isDebugEnabled()) { log.debug("seek, testTreeState is false, HEAD(root) does not have top"); } // check nodes in positives to see if they're all null/outside range // or if nothing percolated up to root yet. - List removals = new ArrayList(); + List removals = new ArrayList<>(); for (BooleanLogicTreeNode node : positives) { if (!node.hasTop() || !overallRange.contains(node.getTopKey())) { removals.add(node); @@ -1848,29 +1920,31 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, return; } } - + private int compare(Key k1, Key k2) { if (k1 != null && k2 != null) { return k1.compareTo(k2); } else if (k1 == null && k2 == null) { return 0; - } else if (k1 == null) { // in this case, null is considered bigger b/c it's closer to the end of the table. + } else if (k1 == null) { // in this case, null is considered bigger b/c it's closer to the end + // of the table. return 1; } else { return -1; } } - + private void setupRangerators(Range range) throws IOException { if (rangerators == null || rangerators.isEmpty()) { return; } for (BooleanLogicTreeNode node : rangerators) { - Set fValues = new HashSet(); + Set fValues = new HashSet<>(); OrIterator orIter = new OrIterator(); SortedKeyValueIterator siter = sourceIterator.deepCopy(env); // create UniqFieldNameValueIterator to find uniq field names values - UniqFieldNameValueIterator uniq = new UniqFieldNameValueIterator(node.getFieldName(), node.getLowerBound(), node.getUpperBound()); + UniqFieldNameValueIterator uniq = new UniqFieldNameValueIterator(node.getFieldName(), + node.getLowerBound(), node.getUpperBound()); uniq.setSource(siter); uniq.seek(range, EMPTY_COL_FAMS, false); while (uniq.hasTop()) { @@ -1891,18 +1965,19 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } node.setUserObject(orIter); } - + } - - /* ************************************************************************* - * Inner classes + + /* + * ************************************************************************* Inner classes */ public class BooleanLogicTreeNodeComparator implements Comparator { - + + @Override public int compare(Object o1, Object o2) { BooleanLogicTreeNode n1 = (BooleanLogicTreeNode) o1; BooleanLogicTreeNode n2 = (BooleanLogicTreeNode) o2; - + Key k1 = n1.getTopKey(); Key k2 = n2.getTopKey(); if (log.isDebugEnabled()) { @@ -1917,7 +1992,7 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, log.debug("BooleanLogicTreeNodeComparator \tt1: " + t1 + " t2: " + t2); } // return t1.compareTo(t2); - + if (k1 != null && k2 != null) { return k1.compareTo(k2); } else if (k1 == null && k2 == null) { @@ -1927,15 +2002,18 @@ public class BooleanLogicIterator implements SortedKeyValueIterator, } else { return -1; } - + } } - + + @Override public IteratorOptions describeOptions() { - return new IteratorOptions(getClass().getSimpleName(), "evaluates event objects against an expression", Collections.singletonMap(QUERY_OPTION, - "query expression"), null); + return new IteratorOptions(getClass().getSimpleName(), + "evaluates event objects against an expression", + Collections.singletonMap(QUERY_OPTION, "query expression"), null); } - + + @Override public boolean validateOptions(Map options) { if (!options.containsKey(QUERY_OPTION)) { return false; diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java index 6783efe..f83de46 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/DefaultIteratorEnvironment.java @@ -16,31 +16,33 @@ */ package org.apache.accumulo.examples.wikisearch.iterator; -import org.apache.accumulo.core.conf.AccumuloConfiguration; +import java.io.IOException; + import org.apache.accumulo.core.client.sample.SamplerConfiguration; +import org.apache.accumulo.core.conf.AccumuloConfiguration; +import org.apache.accumulo.core.conf.DefaultConfiguration; import org.apache.accumulo.core.data.Key; import org.apache.accumulo.core.data.Value; import org.apache.accumulo.core.iterators.IteratorEnvironment; +import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope; import org.apache.accumulo.core.iterators.SortedKeyValueIterator; import org.apache.accumulo.core.iterators.system.MapFileIterator; +import org.apache.accumulo.core.security.Authorizations; import org.apache.accumulo.core.util.CachedConfiguration; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; -import org.apache.accumulo.core.security.Authorizations; -import org.apache.accumulo.core.iterators.IteratorUtil.IteratorScope; - -import java.io.IOException; public class DefaultIteratorEnvironment implements IteratorEnvironment { AccumuloConfiguration conf; public DefaultIteratorEnvironment() { - this.conf = AccumuloConfiguration.getDefaultConfiguration(); + this.conf = DefaultConfiguration.getInstance(); } @Override - public SortedKeyValueIterator reserveMapFileReader(String mapFileName) throws IOException { + public SortedKeyValueIterator reserveMapFileReader(String mapFileName) + throws IOException { Configuration conf = CachedConfiguration.getInstance(); FileSystem fs = FileSystem.get(conf); return new MapFileIterator(this.conf, fs, mapFileName, conf); @@ -85,4 +87,9 @@ public class DefaultIteratorEnvironment implements IteratorEnvironment { public IteratorEnvironment cloneWithSamplingEnabled() { throw new UnsupportedOperationException(); } + + @Override + public boolean isUserCompaction() { + return false; + } } diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java index b2a0c83..5ade156 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/EvaluatingIterator.java @@ -33,69 +33,70 @@ import org.apache.accumulo.examples.wikisearch.parser.EventFields.FieldValue; import org.apache.commons.collections.map.LRUMap; import org.apache.hadoop.io.Text; - public class EvaluatingIterator extends AbstractEvaluatingIterator { - + public static final String NULL_BYTE_STRING = "\u0000"; LRUMap visibilityMap = new LRUMap(); - + public EvaluatingIterator() { super(); } - + public EvaluatingIterator(AbstractEvaluatingIterator other, IteratorEnvironment env) { super(other, env); } - + + @Override public SortedKeyValueIterator deepCopy(IteratorEnvironment env) { return new EvaluatingIterator(this, env); } - + @Override public PartialKey getKeyComparator() { return PartialKey.ROW_COLFAM; } - + @Override public Key getReturnKey(Key k) { - // If we were using column visibility, then we would get the merged visibility here and use it in the key. + // If we were using column visibility, then we would get the merged visibility here and use it + // in the key. // Remove the COLQ from the key and use the combined visibility - Key r = new Key(k.getRowData().getBackingArray(), k.getColumnFamilyData().getBackingArray(), NULL_BYTE, k.getColumnVisibility().getBytes(), - k.getTimestamp(), k.isDeleted(), false); + Key r = new Key(k.getRowData().getBackingArray(), k.getColumnFamilyData().getBackingArray(), + NULL_BYTE, k.getColumnVisibility().getBytes(), k.getTimestamp(), k.isDeleted(), false); return r; } - + @Override public void fillMap(EventFields event, Key key, Value value) { // If we were using column visibility, we would have to merge them here. - + // Pull the datatype from the colf in case we need to do anything datatype specific. // String colf = key.getColumnFamily().toString(); // String datatype = colf.substring(0, colf.indexOf(NULL_BYTE_STRING)); - + // For the partitioned table, the field name and field value are stored in the column qualifier // separated by a \0. String colq = key.getColumnQualifier().toString();// .toLowerCase(); int idx = colq.indexOf(NULL_BYTE_STRING); String fieldName = colq.substring(0, idx); String fieldValue = colq.substring(idx + 1); - + event.put(fieldName, new FieldValue(getColumnVisibility(key), fieldValue.getBytes())); } /** - * @param key * @return The column visibility */ public ColumnVisibility getColumnVisibility(Key key) { ColumnVisibility result = (ColumnVisibility) visibilityMap.get(key.getColumnVisibility()); - if (result != null) + if (result != null) { return result; + } result = new ColumnVisibility(key.getColumnVisibility().getBytes()); visibilityMap.put(key.getColumnVisibility(), result); return result; } - + /** * Don't accept this key if the colf starts with 'fi' */ @@ -105,11 +106,12 @@ public class EvaluatingIterator extends AbstractEvaluatingIterator { Key copy = new Key(key.getRow(), new Text("fi\01")); Collection columnFamilies = Collections.emptyList(); this.iterator.seek(new Range(copy, copy), columnFamilies, true); - if (this.iterator.hasTop()) + if (this.iterator.hasTop()) { return isKeyAccepted(this.iterator.getTopKey()); + } return true; } return true; } - + } diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java index 78c8576..53752c0 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/iterator/OrIterator.java @@ -34,13 +34,14 @@ import org.apache.hadoop.io.Text; import org.apache.log4j.Logger; /** - * An iterator that handles "OR" query constructs on the server side. This code has been adapted/merged from Heap and Multi Iterators. + * An iterator that handles "OR" query constructs on the server side. This code has been + * adapted/merged from Heap and Multi Iterators. */ public class OrIterator implements SortedKeyValueIterator { - + private TermSource currentTerm; private ArrayList sources; - private PriorityQueue sorted = new PriorityQueue(5); + private PriorityQueue sorted = new PriorityQueue<>(5); private static final Text nullText = new Text(); private Key topKey = null; private Range overallRange; @@ -48,9 +49,9 @@ public class OrIterator implements SortedKeyValueIterator { private boolean inclusive; protected static final Logger log = Logger.getLogger(OrIterator.class); private Text parentEndRow; - + protected static class TermSource implements Comparable { - + public SortedKeyValueIterator iter; public Text dataLocation; public Text term; @@ -58,40 +59,40 @@ public class OrIterator implements SortedKeyValueIterator { public Text fieldTerm; public Key topKey; public boolean atEnd; - + public TermSource(TermSource other) { this.iter = other.iter; this.term = other.term; this.dataLocation = other.dataLocation; this.atEnd = other.atEnd; } - + public TermSource(SortedKeyValueIterator iter, Text term) { this.iter = iter; this.term = term; this.atEnd = false; } - + public TermSource(SortedKeyValueIterator iter, Text dataLocation, Text term) { this.iter = iter; this.dataLocation = dataLocation; this.term = term; this.atEnd = false; } - + public void setNew() { if (!this.atEnd && this.iter.hasTop()) { this.topKey = this.iter.getTopKey(); - + if (log.isDebugEnabled()) { log.debug("OI.TermSource.setNew TS.iter.topKey >>" + topKey + "<<"); } - + if (this.term == null) { this.docid = this.topKey.getColumnQualifier(); } else { String cqString = this.topKey.getColumnQualifier().toString(); - + int idx = cqString.indexOf("\0"); this.fieldTerm = new Text(cqString.substring(0, idx)); this.docid = new Text(cqString.substring(idx + 1)); @@ -100,7 +101,7 @@ public class OrIterator implements SortedKeyValueIterator { if (log.isDebugEnabled()) { log.debug("OI.TermSource.setNew Setting to null..."); } - + // this.term = null; // this.dataLocation = null; this.topKey = null; @@ -108,7 +109,8 @@ public class OrIterator implements SortedKeyValueIterator { this.docid = null; } } - + + @Override public int compareTo(TermSource o) { // NOTE: If your implementation can have more than one row in a tablet, // you must compare row key here first, then column qualifier. @@ -116,14 +118,14 @@ public class OrIterator implements SortedKeyValueIterator { // sorted after they have been determined to be valid. // return this.docid.compareTo(o.docid); // return this.topKey.compareTo(o.topKey); - + // NOTE! We need to compare UID's, not Keys! Key k1 = topKey; Key k2 = o.topKey; // return t1.compareTo(t2); String uid1 = getUID(k1); String uid2 = getUID(k2); - + if (uid1 != null && uid2 != null) { return uid1.compareTo(uid2); } else if (uid1 == null && uid2 == null) { @@ -133,107 +135,104 @@ public class OrIterator implements SortedKeyValueIterator { } else { return -1; } - + } - + @Override public String toString() { return "TermSource: " + this.dataLocation + " " + this.term; } - + public boolean hasTop() { return this.topKey != null; } } - + /** * Returns the given key's row - * - * @param key + * * @return The given key's row */ protected Text getPartition(Key key) { return key.getRow(); } - + /** * Returns the given key's dataLocation - * - * @param key + * * @return The given key's dataLocation */ protected Text getDataLocation(Key key) { return key.getColumnFamily(); } - + /** * Returns the given key's term - * - * @param key + * * @return The given key's term */ protected Text getTerm(Key key) { int idx = 0; String sKey = key.getColumnQualifier().toString(); - + idx = sKey.indexOf("\0"); return new Text(sKey.substring(0, idx)); } - + /** * Returns the given key's DocID - * - * @param key + * * @return The given key's DocID */ protected Text getDocID(Key key) { int idx = 0; String sKey = key.getColumnQualifier().toString(); - + idx = sKey.indexOf("\0"); return new Text(sKey.substring(idx + 1)); } - + /** * Returns the given key's UID - * - * @param key + * * @return The given key's UID */ static protected String getUID(Key key) { try { int idx = 0; String sKey = key.getColumnQualifier().toString(); - + idx = sKey.indexOf("\0"); return sKey.substring(idx + 1); } catch (Exception e) { return null; } } - + public OrIterator() { - this.sources = new ArrayList(); + this.sources = new ArrayList<>(); } - + private OrIterator(OrIterator other, IteratorEnvironment env) { - this.sources = new ArrayList(); - + this.sources = new ArrayList<>(); + for (TermSource TS : other.sources) { this.sources.add(new TermSource(TS.iter.deepCopy(env), TS.dataLocation, TS.term)); } } - + + @Override public SortedKeyValueIterator deepCopy(IteratorEnvironment env) { return new OrIterator(this, env); } - - public void addTerm(SortedKeyValueIterator source, Text term, IteratorEnvironment env) { + + public void addTerm(SortedKeyValueIterator source, Text term, + IteratorEnvironment env) { if (log.isDebugEnabled()) { log.debug("OI.addTerm Added source w/o family"); log.debug("OI.addTerm term >>" + term + "<<"); } - + // Don't deepcopy an iterator if (term == null) { this.sources.add(new TermSource(source, term)); @@ -241,13 +240,14 @@ public class OrIterator implements SortedKeyValueIterator { this.sources.add(new TermSource(source.deepCopy(env), term)); } } - - public void addTerm(SortedKeyValueIterator source, Text dataLocation, Text term, IteratorEnvironment env) { + + public void addTerm(SortedKeyValueIterator source, Text dataLocation, Text term, + IteratorEnvironment env) { if (log.isDebugEnabled()) { log.debug("OI.addTerm Added source "); log.debug("OI.addTerm family >>" + dataLocation + "<< term >>" + term + "<<"); } - + // Don't deepcopy an iterator if (term == null) { this.sources.add(new TermSource(source, dataLocation, term)); @@ -255,60 +255,65 @@ public class OrIterator implements SortedKeyValueIterator { this.sources.add(new TermSource(source.deepCopy(env), dataLocation, term)); } } - + /** * Construct the topKey given the current TermSource - * - * @param TS + * * @return The top Key for a given TermSource */ protected Key buildTopKey(TermSource TS) { if ((TS == null) || (TS.topKey == null)) { return null; } - + if (log.isDebugEnabled()) { - log.debug("OI.buildTopKey New topKey >>" + new Key(TS.topKey.getRow(), TS.dataLocation, TS.docid) + "<<"); + log.debug("OI.buildTopKey New topKey >>" + + new Key(TS.topKey.getRow(), TS.dataLocation, TS.docid) + "<<"); } - + return new Key(TS.topKey.getRow(), TS.topKey.getColumnFamily(), TS.topKey.getColumnQualifier()); } - + + @Override final public void next() throws IOException { if (log.isDebugEnabled()) { - log.debug("OI.next Enter: sorted.size = " + sorted.size() + " currentTerm = " + ((currentTerm == null) ? "null" : "not null")); + log.debug("OI.next Enter: sorted.size = " + sorted.size() + " currentTerm = " + + ((currentTerm == null) ? "null" : "not null")); } - + if (currentTerm == null) { if (log.isDebugEnabled()) { log.debug("OI.next currentTerm is NULL... returning"); } - + topKey = null; return; } - + // Advance currentTerm currentTerm.iter.next(); - + advanceToMatch(currentTerm); - + currentTerm.setNew(); - + // See if currentTerm is still valid, remove if not if (log.isDebugEnabled()) { - log.debug("OI.next Checks (correct = 0,0,0): " + ((currentTerm.topKey != null) ? "0," : "1,") + ((currentTerm.dataLocation != null) ? "0," : "1,") - + ((currentTerm.term != null && currentTerm.fieldTerm != null) ? (currentTerm.term.compareTo(currentTerm.fieldTerm)) : "0")); + log.debug("OI.next Checks (correct = 0,0,0): " + ((currentTerm.topKey != null) ? "0," : "1,") + + ((currentTerm.dataLocation != null) ? "0," : "1,") + + ((currentTerm.term != null && currentTerm.fieldTerm != null) + ? (currentTerm.term.compareTo(currentTerm.fieldTerm)) : "0")); } - - if (currentTerm.topKey == null || ((currentTerm.dataLocation != null) && (currentTerm.term.compareTo(currentTerm.fieldTerm) != 0))) { + + if (currentTerm.topKey == null || ((currentTerm.dataLocation != null) + && (currentTerm.term.compareTo(currentTerm.fieldTerm) != 0))) { if (log.isDebugEnabled()) { log.debug("OI.next removing entry:" + currentTerm.term); } - + currentTerm = null; } - + // optimization. // if size == 0, currentTerm is the only item left, // OR there are no items left. @@ -321,58 +326,62 @@ public class OrIterator implements SortedKeyValueIterator { // and get the current top item out. currentTerm = sorted.poll(); } - + if (log.isDebugEnabled()) { log.debug("OI.next CurrentTerm is " + ((currentTerm == null) ? "null" : currentTerm)); } - + topKey = buildTopKey(currentTerm); - + if (hasTop()) { if (overallRange != null && !overallRange.contains(topKey)) { topKey = null; } } } - - public void seek(Range range, Collection columnFamilies, boolean inclusive) throws IOException { - + + @Override + public void seek(Range range, Collection columnFamilies, boolean inclusive) + throws IOException { + overallRange = new Range(range); if (log.isDebugEnabled()) { log.debug("seek, overallRange: " + overallRange); } - + if (range.getEndKey() != null && range.getEndKey().getRow() != null) { this.parentEndRow = range.getEndKey().getRow(); } - + if (log.isDebugEnabled()) { log.debug("OI.seek Entry - sources.size = " + sources.size()); - log.debug("OI.seek Entry - currentTerm = " + ((currentTerm == null) ? "false" : currentTerm.iter.getTopKey())); - log.debug("OI.seek Entry - Key from Range = " + ((range == null) ? "false" : range.getStartKey())); + log.debug("OI.seek Entry - currentTerm = " + + ((currentTerm == null) ? "false" : currentTerm.iter.getTopKey())); + log.debug( + "OI.seek Entry - Key from Range = " + ((range == null) ? "false" : range.getStartKey())); } - + // If sources.size is 0, there is nothing to process, so just return. if (sources.isEmpty()) { currentTerm = null; topKey = null; return; } - + this.columnFamilies = columnFamilies; this.inclusive = inclusive; - + Range newRange = range; Key sourceKey = null; Key startKey = null; - + if (range != null) { startKey = range.getStartKey(); } - + // Clear the PriorityQueue so that we can re-populate it. sorted.clear(); - + TermSource TS = null; Iterator iter = sources.iterator(); // For each term, seek forward. @@ -380,66 +389,73 @@ public class OrIterator implements SortedKeyValueIterator { int counter = 1; while (iter.hasNext()) { TS = iter.next(); - + TS.atEnd = false; - + if (sources.size() == 1) { currentTerm = TS; } - + if (log.isDebugEnabled()) { log.debug("OI.seek on TS >>" + TS + "<<"); log.debug("OI.seek seeking source >>" + counter + "<< "); } - + counter++; - + newRange = range; sourceKey = null; - + if (startKey != null) { // Construct the new key for the range if (log.isDebugEnabled()) { log.debug("OI.seek startKey >>" + startKey + "<<"); } - + if (startKey.getColumnQualifier() != null) { - sourceKey = new Key(startKey.getRow(), (TS.dataLocation == null) ? nullText : TS.dataLocation, new Text(((TS.term == null) ? "" : TS.term + "\0") - + range.getStartKey().getColumnQualifier())); + sourceKey = + new Key(startKey.getRow(), (TS.dataLocation == null) ? nullText : TS.dataLocation, + new Text(((TS.term == null) ? "" : TS.term + "\0") + + range.getStartKey().getColumnQualifier())); } else { - sourceKey = new Key(startKey.getRow(), (TS.dataLocation == null) ? nullText : TS.dataLocation, (TS.term == null) ? nullText : TS.term); + sourceKey = + new Key(startKey.getRow(), (TS.dataLocation == null) ? nullText : TS.dataLocation, + (TS.term == null) ? nullText : TS.term); } - + if (log.isDebugEnabled()) { log.debug("OI.seek Seeking to the key => " + sourceKey); } - + newRange = new Range(sourceKey, true, sourceKey.followingKey(PartialKey.ROW), false); } else { if (log.isDebugEnabled()) { log.debug("OI.seek Using the range Seek() argument to seek => " + newRange); } } - + TS.iter.seek(newRange, columnFamilies, inclusive); - + TS.setNew(); - + // Make sure we're on a key with the correct dataLocation and term advanceToMatch(TS); - + TS.setNew(); - + if (log.isDebugEnabled()) { log.debug("OI.seek sourceKey >>" + sourceKey + "<< "); log.debug("OI.seek topKey >>" + ((TS.topKey == null) ? "false" : TS.topKey) + "<< "); log.debug("OI.seek TS.fieldTerm == " + TS.fieldTerm); - - log.debug("OI.seek Checks (correct = 0,0,0 / 0,1,1): " + ((TS.topKey != null) ? "0," : "1,") + ((TS.dataLocation != null) ? "0," : "1,") - + (((TS.term != null && TS.fieldTerm != null) && (TS.term.compareTo(TS.fieldTerm) != 0)) ? "0" : "1")); + + log.debug("OI.seek Checks (correct = 0,0,0 / 0,1,1): " + ((TS.topKey != null) ? "0," : "1,") + + ((TS.dataLocation != null) ? "0," : "1,") + + (((TS.term != null && TS.fieldTerm != null) && (TS.term.compareTo(TS.fieldTerm) != 0)) + ? "0" : "1")); } - - if ((TS.topKey == null) || ((TS.dataLocation != null) && (TS.term.compareTo(TS.fieldTerm) != 0))) { + + if ((TS.topKey == null) + || ((TS.dataLocation != null) && (TS.term.compareTo(TS.fieldTerm) != 0))) { // log.debug("OI.seek Removing " + TS.term); // iter.remove(); } // Optimization if we only have one element @@ -451,7 +467,7 @@ public class OrIterator implements SortedKeyValueIterator { if (log.isDebugEnabled()) { log.debug("OI.seek new topKey >>" + ((topKey == null) ? "false" : topKey) + "<< "); } - + // make sure it is in the range if we have one. if (hasTop()) { if (overallRange != null && !overallRange.contains(topKey)) { @@ -464,25 +480,25 @@ public class OrIterator implements SortedKeyValueIterator { return; } } - + // And set currentTerm = the next valid key/term. currentTerm = sorted.poll(); - + if (log.isDebugEnabled()) { log.debug("OI.seek currentTerm = " + currentTerm); } - + topKey = buildTopKey(currentTerm); if (topKey == null) { if (log.isDebugEnabled()) { log.debug("OI.seek() topKey is null"); } } - + if (log.isDebugEnabled()) { log.debug("OI.seek new topKey >>" + ((topKey == null) ? "false" : topKey) + "<< "); } - + if (hasTop()) { if (overallRange != null && !overallRange.contains(topKey)) { if (log.isDebugEnabled()) { @@ -491,44 +507,49 @@ public class OrIterator implements SortedKeyValueIterator { topKey = null; } } - + } - + + @Override final public Key getTopKey() { if (log.isDebugEnabled()) { log.debug("OI.getTopKey key >>" + topKey); } - + return topKey; } - + + @Override final public Value getTopValue() { if (log.isDebugEnabled()) { log.debug("OI.getTopValue key >>" + currentTerm.iter.getTopValue()); } - + return currentTerm.iter.getTopValue(); } - + + @Override final public boolean hasTop() { if (log.isDebugEnabled()) { log.debug("OI.hasTop = " + ((topKey == null) ? "false" : "true")); } - + return topKey != null; } - - public void init(SortedKeyValueIterator source, Map options, IteratorEnvironment env) throws IOException { + + @Override + public void init(SortedKeyValueIterator source, Map options, + IteratorEnvironment env) throws IOException { throw new UnsupportedOperationException(); } - + /** - * Ensures that the current TermSource is pointing to a key with the correct dataLocation and term or sets - * topKey to null if there is no such key remaining. - * + * Ensures that the current TermSource is pointing to a key with the correct + * dataLocation and term or sets topKey to null if there is + * no such key remaining. + * * @param TS * The TermSource to advance - * @throws IOException */ private void advanceToMatch(TermSource TS) throws IOException { boolean matched = false; @@ -537,25 +558,26 @@ public class OrIterator implements SortedKeyValueIterator { TS.topKey = null; return; } - + Key iterTopKey = TS.iter.getTopKey(); - + if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch current topKey = " + iterTopKey); } - + // we should compare the row to the end of the range if (overallRange.getEndKey() != null) { - + if (overallRange != null && !overallRange.contains(TS.iter.getTopKey())) { if (log.isDebugEnabled()) { - log.debug("overallRange: " + overallRange + " does not contain TS.iter.topKey: " + TS.iter.getTopKey()); + log.debug("overallRange: " + overallRange + " does not contain TS.iter.topKey: " + + TS.iter.getTopKey()); log.debug("OI.advanceToMatch at the end, returning"); } - + TS.atEnd = true; TS.topKey = null; - + return; } else { if (log.isDebugEnabled()) { @@ -567,116 +589,118 @@ public class OrIterator implements SortedKeyValueIterator { log.debug("OI.advanceToMatch overallRange.getEndKey() == null"); } } - + // Advance to the correct dataLocation if (log.isDebugEnabled()) { log.debug("Comparing dataLocations."); - log.debug("OI.advanceToMatch dataLocationCompare: " + getDataLocation(iterTopKey) + " == " + TS.dataLocation); + log.debug("OI.advanceToMatch dataLocationCompare: " + getDataLocation(iterTopKey) + " == " + + TS.dataLocation); } - + int dataLocationCompare = getDataLocation(iterTopKey).compareTo(TS.dataLocation); - + if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch dataLocationCompare = " + dataLocationCompare); } - + // Make sure we're at a row for this dataLocation if (dataLocationCompare < 0) { if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch seek to desired dataLocation"); } - + Key seekKey = new Key(iterTopKey.getRow(), TS.dataLocation, nullText); - + if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch seeking to => " + seekKey); } - + TS.iter.seek(new Range(seekKey, true, null, false), columnFamilies, inclusive); - + continue; } else if (dataLocationCompare > 0) { if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch advanced beyond desired dataLocation, seek to next row"); } - + // Gone past the current dataLocation, seek to the next row Key seekKey = iterTopKey.followingKey(PartialKey.ROW); - + if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch seeking to => " + seekKey); } - + TS.iter.seek(new Range(seekKey, true, null, false), columnFamilies, inclusive); - + continue; } - + // Advance to the correct term if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch termCompare: " + getTerm(iterTopKey) + " == " + TS.term); } - + int termCompare = getTerm(iterTopKey).compareTo(TS.term); - + if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch termCompare = " + termCompare); } - + // Make sure we're at a row for this term if (termCompare < 0) { if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch seek to desired term"); } - + Key seekKey = new Key(iterTopKey.getRow(), iterTopKey.getColumnFamily(), TS.term); - + if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch seeking to => " + seekKey); } - + TS.iter.seek(new Range(seekKey, true, null, false), columnFamilies, inclusive); - + continue; } else if (termCompare > 0) { if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch advanced beyond desired term, seek to next row"); } - + // Gone past the current term, seek to the next row Key seekKey = iterTopKey.followingKey(PartialKey.ROW); - + if (log.isDebugEnabled()) { log.debug("OI.advanceToMatch seeking to => " + seekKey); } - + TS.iter.seek(new Range(seekKey, true, null, false), columnFamilies, inclusive); continue; } - + // If we made it here, we found a match matched = true; } } - + public boolean jump(Key jumpKey) throws IOException { if (log.isDebugEnabled()) { log.debug("OR jump: " + jumpKey); printTopKeysForTermSources(); } - + // is the jumpKey outside my overall range? if (parentEndRow != null && parentEndRow.compareTo(jumpKey.getRow()) < 0) { // can't go there. if (log.isDebugEnabled()) { - log.debug("jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow); + log.debug( + "jumpRow: " + jumpKey.getRow() + " is greater than my parentEndRow: " + parentEndRow); } return false; } - + // Clear the PriorityQueue so that we can re-populate it. sorted.clear(); - + // check each term source and jump it if necessary. for (TermSource ts : sources) { int comp; @@ -684,7 +708,8 @@ public class OrIterator implements SortedKeyValueIterator { if (log.isDebugEnabled()) { log.debug("jump called, but ts.topKey is null, this one needs to move to next row."); } - Key startKey = new Key(jumpKey.getRow(), ts.dataLocation, new Text(ts.term + "\0" + jumpKey.getColumnFamily())); + Key startKey = new Key(jumpKey.getRow(), ts.dataLocation, + new Text(ts.term + "\0" + jumpKey.getColumnFamily())); Key endKey = null; if (parentEndRow != null) { endKey = new Key(parentEndRow); @@ -694,14 +719,15 @@ public class OrIterator implements SortedKeyValueIterator { ts.setNew(); advanceToMatch(ts); ts.setNew(); - + } else { // check row, then uid comp = this.topKey.getRow().compareTo(jumpKey.getRow()); if (comp > 0) { if (log.isDebugEnabled()) { log.debug("jump, our row is ahead of jumpKey."); - log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + " parentEndRow" + parentEndRow); + log.debug("jumpRow: " + jumpKey.getRow() + " myRow: " + topKey.getRow() + + " parentEndRow" + parentEndRow); } if (ts.hasTop()) { sorted.add(ts); @@ -725,21 +751,21 @@ public class OrIterator implements SortedKeyValueIterator { // need to check uid String myUid = getUID(ts.topKey); String jumpUid = getUID(jumpKey); - + if (log.isDebugEnabled()) { if (myUid == null) { log.debug("myUid is null"); } else { log.debug("myUid: " + myUid); } - + if (jumpUid == null) { log.debug("jumpUid is null"); } else { log.debug("jumpUid: " + jumpUid); } } - + int ucomp = myUid.compareTo(jumpUid); if (ucomp < 0) { // need to move forward @@ -761,7 +787,7 @@ public class OrIterator implements SortedKeyValueIterator { ts.setNew(); advanceToMatch(ts); ts.setNew(); - + if (log.isDebugEnabled()) { if (ts.iter.hasTop()) { log.debug("ts.iter.topkey: " + ts.iter.getTopKey()); @@ -769,10 +795,10 @@ public class OrIterator implements SortedKeyValueIterator { log.debug("ts.iter.topKey is null"); } } - }// else do nothing, we're ahead of jump key + } // else do nothing, we're ahead of jump key } } - + // ts should have moved, validate this particular ts. if (ts.hasTop()) { if (overallRange != null) { @@ -790,14 +816,14 @@ public class OrIterator implements SortedKeyValueIterator { if (log.isDebugEnabled()) { log.debug("OI.jump currentTerm = " + currentTerm); } - + topKey = buildTopKey(currentTerm); if (log.isDebugEnabled()) { log.debug("OI.jump new topKey >>" + ((topKey == null) ? "false" : topKey) + "<< "); } return hasTop(); } - + private void printTopKeysForTermSources() { if (log.isDebugEnabled()) { for (TermSource ts : sources) { @@ -811,7 +837,7 @@ public class OrIterator implements SortedKeyValueIterator { log.debug("ts is null"); } } - + if (topKey != null) { log.debug("OrIterator current topKey: " + topKey); } else { diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java index 5c7c20c..75135be 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/logic/AbstractQueryLogic.java @@ -69,112 +69,115 @@ import com.google.common.collect.Multimap; /** *
  * 

Overview

- * Query implementation that works with the JEXL grammar. This + * Query implementation that works with the JEXL grammar. This * uses the metadata, global index, and partitioned table to return * results based on the query. Example queries: - * + * * Single Term Query * 'foo' - looks in global index for foo, and if any entries are found, then the query * is rewritten to be field1 == 'foo' or field2 == 'foo', etc. This is then passed * down the optimized query path which uses the intersecting iterators on the partitioned * table. - * - * Boolean expression + * + * Boolean expression * field == 'foo' - For fielded queries, those that contain a field, an operator, and a literal (string or number), * the query is parsed and the set of eventFields in the query that are indexed is determined by * querying the metadata table. Depending on the conjunctions in the query (or, and, not) and the * eventFields that are indexed, the query may be sent down the optimized path or the full scan path. - * + * * We are not supporting all of the operators that JEXL supports at this time. We are supporting the following operators: - * + * * ==, !=, >, ≥, <, ≤, =~, and !~ - * + * * Custom functions can be created and registered with the Jexl engine. The functions can be used in the queries in conjunction * with other supported operators. A sample function has been created, called between, and is bound to the 'f' namespace. An * example using this function is : "f:between(LATITUDE,60.0, 70.0)" - * + * *

Constraints on Query Structure

* Queries that are sent to this class need to be formatted such that there is a space on either side of the operator. We are - * rewriting the query in some cases and the current implementation is expecting a space on either side of the operator. If + * rewriting the query in some cases and the current implementation is expecting a space on either side of the operator. If * an error occurs in the evaluation we are skipping the event. - * + * *

Notes on Optimization

* Queries that meet any of the following criteria will perform a full scan of the events in the partitioned table: - * + * * 1. An 'or' conjunction exists in the query but not all of the terms are indexed. * 2. No indexed terms exist in the query * 3. An unsupported operator exists in the query - * + * *
- * + * */ public abstract class AbstractQueryLogic { - + protected static Logger log = Logger.getLogger(AbstractQueryLogic.class); - + /** * Set of datatypes to limit the query to. */ public static final String DATATYPE_FILTER_SET = "datatype.filter.set"; - + private static class DoNotPerformOptimizedQueryException extends Exception { private static final long serialVersionUID = 1L; } - + /** - * Object that is used to hold ranges found in the index. Subclasses may compute the final range set in various ways. + * Object that is used to hold ranges found in the index. Subclasses may compute the final range + * set in various ways. */ public static abstract class IndexRanges { - + private Map indexValuesToOriginalValues = null; private Multimap fieldNamesAndValues = HashMultimap.create(); - private Map termCardinality = new HashMap(); - protected Map> ranges = new HashMap>(); - + private Map termCardinality = new HashMap<>(); + protected Map> ranges = new HashMap<>(); + public Multimap getFieldNamesAndValues() { return fieldNamesAndValues; } - + public void setFieldNamesAndValues(Multimap fieldNamesAndValues) { this.fieldNamesAndValues = fieldNamesAndValues; } - + public final Map getTermCardinality() { return termCardinality; } - + public Map getIndexValuesToOriginalValues() { return indexValuesToOriginalValues; } - + public void setIndexValuesToOriginalValues(Map indexValuesToOriginalValues) { this.indexValuesToOriginalValues = indexValuesToOriginalValues; } - + public abstract void add(String term, Range r); - + public abstract Set getRanges(); } - + /** - * Object that computes the ranges by unioning all of the ranges for all of the terms together. In the case where ranges overlap, the largest range is used. + * Object that computes the ranges by unioning all of the ranges for all of the terms together. In + * the case where ranges overlap, the largest range is used. */ public static class UnionIndexRanges extends IndexRanges { - + public static String DEFAULT_KEY = "default"; - + public UnionIndexRanges() { this.ranges.put(DEFAULT_KEY, new TreeSet()); } - + + @Override public Set getRanges() { // So the set of ranges is ordered. It *should* be the case that // ranges with partition ids will sort before ranges that point to // a specific event. Populate a new set of ranges but don't add a // range for an event where that range is contained in a range already // added. - Set shardsAdded = new HashSet(); - Set returnSet = new HashSet(); + Set shardsAdded = new HashSet<>(); + Set returnSet = new HashSet<>(); for (Range r : ranges.get(DEFAULT_KEY)) { if (!shardsAdded.contains(r.getStartKey().getRow())) { // Only add ranges with a start key for the entire partition. @@ -184,18 +187,20 @@ public abstract class AbstractQueryLogic { returnSet.add(r); } else { // if (log.isTraceEnabled()) - log.info("Skipping event specific range: " + r.toString() + " because range has already been added: " + log.info("Skipping event specific range: " + r.toString() + + " because range has already been added: " + shardsAdded.contains(r.getStartKey().getRow())); } } return returnSet; } - + + @Override public void add(String term, Range r) { ranges.get(DEFAULT_KEY).add(r); } } - + private String metadataTableName; private String indexTableName; private String reverseIndexTableName; @@ -207,35 +212,31 @@ public abstract class AbstractQueryLogic { private Kryo kryo = new Kryo(); private EventFields eventFields = new EventFields(); private List unevaluatedFields = null; - private Map,Normalizer> normalizerCacheMap = new HashMap,Normalizer>(); + private Map,Normalizer> normalizerCacheMap = new HashMap<>(); private static final String NULL_BYTE = "\u0000"; - + public AbstractQueryLogic() { super(); EventFields.initializeKryo(kryo); } - + /** * Queries metadata table to determine which terms are indexed. - * - * @param c - * @param auths - * @param queryLiterals + * * @param datatypes * - optional list of types * @return map of indexed field names to types to normalizers used in this date range - * @throws TableNotFoundException - * @throws IllegalAccessException - * @throws InstantiationException */ - protected Map>> findIndexedTerms(Connector c, Authorizations auths, Set queryLiterals, - Set datatypes) throws TableNotFoundException, InstantiationException, IllegalAccessException { - - Map>> results = new HashMap>>(); - + protected Map>> findIndexedTerms(Connector c, + Authorizations auths, Set queryLiterals, Set datatypes) + throws TableNotFoundException, InstantiationException, IllegalAccessException { + + Map>> results = new HashMap<>(); + for (String literal : queryLiterals) { - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Querying " + this.getMetadataTableName() + " table for " + literal); + } Range range = new Range(literal.toUpperCase()); Scanner scanner = c.createScanner(this.getMetadataTableName(), auths); scanner.setRange(range); @@ -252,13 +253,16 @@ public abstract class AbstractQueryLogic { if (idx != -1) { String type = colq.substring(0, idx); // If types are specified and this type is not in the list then skip it. - if (null != datatypes && !datatypes.contains(type)) + if (null != datatypes && !datatypes.contains(type)) { continue; + } try { @SuppressWarnings("unchecked") - Class clazz = (Class) Class.forName(colq.substring(idx + 1)); - if (!normalizerCacheMap.containsKey(clazz)) + Class clazz = + (Class) Class.forName(colq.substring(idx + 1)); + if (!normalizerCacheMap.containsKey(clazz)) { normalizerCacheMap.put(clazz, clazz.newInstance()); + } results.get(literal).put(type, clazz); } catch (ClassNotFoundException e) { log.error("Unable to find normalizer on class path: " + colq.substring(idx + 1), e); @@ -272,26 +276,25 @@ public abstract class AbstractQueryLogic { } } } - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("METADATA RESULTS: " + results.toString()); + } return results; } - + /** * Performs a lookup in the global index for a single non-fielded term. - * - * @param c - * @param auths - * @param value + * * @param datatypes * - optional list of types * @return ranges that fit into the date range. */ - protected abstract IndexRanges getTermIndexInformation(Connector c, Authorizations auths, String value, Set datatypes) throws TableNotFoundException; - + protected abstract IndexRanges getTermIndexInformation(Connector c, Authorizations auths, + String value, Set datatypes) throws TableNotFoundException; + /** * Performs a lookup in the global index / reverse index and returns a RangeCalculator - * + * * @param c * Accumulo connection * @param auths @@ -300,107 +303,107 @@ public abstract class AbstractQueryLogic { * multimap of indexed field name and Normalizers used * @param terms * multimap of field name and QueryTerm object - * @param indexTableName - * @param reverseIndexTableName * @param queryString * original query string - * @param queryThreads * @param datatypes * - optional list of types * @return range calculator - * @throws TableNotFoundException */ - protected abstract RangeCalculator getTermIndexInformation(Connector c, Authorizations auths, Multimap indexedTerms, - Multimap terms, String indexTableName, String reverseIndexTableName, String queryString, int queryThreads, Set datatypes) + protected abstract RangeCalculator getTermIndexInformation(Connector c, Authorizations auths, + Multimap indexedTerms, Multimap terms, + String indexTableName, String reverseIndexTableName, String queryString, int queryThreads, + Set datatypes) throws TableNotFoundException, org.apache.commons.jexl2.parser.ParseException; - - protected abstract Collection getFullScanRange(Date begin, Date end, Multimap terms); - + + protected abstract Collection getFullScanRange(Date begin, Date end, + Multimap terms); + public String getMetadataTableName() { return metadataTableName; } - + public String getIndexTableName() { return indexTableName; } - + public String getTableName() { return tableName; } - + public void setMetadataTableName(String metadataTableName) { this.metadataTableName = metadataTableName; } - + public void setIndexTableName(String indexTableName) { this.indexTableName = indexTableName; } - + public void setTableName(String tableName) { this.tableName = tableName; } - + public int getQueryThreads() { return queryThreads; } - + public void setQueryThreads(int queryThreads) { this.queryThreads = queryThreads; } - + public String getReadAheadQueueSize() { return readAheadQueueSize; } - + public String getReadAheadTimeOut() { return readAheadTimeOut; } - + public boolean isUseReadAheadIterator() { return useReadAheadIterator; } - + public void setReadAheadQueueSize(String readAheadQueueSize) { this.readAheadQueueSize = readAheadQueueSize; } - + public void setReadAheadTimeOut(String readAheadTimeOut) { this.readAheadTimeOut = readAheadTimeOut; } - + public void setUseReadAheadIterator(boolean useReadAheadIterator) { this.useReadAheadIterator = useReadAheadIterator; } - + public String getReverseIndexTableName() { return reverseIndexTableName; } - + public void setReverseIndexTableName(String reverseIndexTableName) { this.reverseIndexTableName = reverseIndexTableName; } - + public List getUnevaluatedFields() { return unevaluatedFields; } - + public void setUnevaluatedFields(List unevaluatedFields) { this.unevaluatedFields = unevaluatedFields; } - + public void setUnevaluatedFields(String unevaluatedFieldList) { - this.unevaluatedFields = new ArrayList(); - for (String field : unevaluatedFieldList.split(",")) + this.unevaluatedFields = new ArrayList<>(); + for (String field : unevaluatedFieldList.split(",")) { this.unevaluatedFields.add(field); + } } - + public Document createDocument(Key key, Value value) { Document doc = new Document(); eventFields.clear(); ByteBuffer buf = ByteBuffer.wrap(value.get()); eventFields.readObjectData(kryo, buf); - + // Set the id to the document id which is located in the colf String row = key.getRow().toString(); String colf = key.getColumnFamily().toString(); @@ -416,48 +419,50 @@ public abstract class AbstractQueryLogic { doc.getFields().add(val); } } - + // Add the pointer for the content. Field docPointer = new Field(); docPointer.setFieldName("DOCUMENT"); docPointer.setFieldValue("DOCUMENT:" + row + "/" + type + "/" + id); doc.getFields().add(docPointer); - + return doc; } - + public String getResultsKey(Entry key) { // Use the colf from the table, it contains the uuid and datatype return key.getKey().getColumnFamily().toString(); } - - public Results runQuery(Connector connector, List authorizations, String query, Date beginDate, Date endDate, Set types) { - + + public Results runQuery(Connector connector, List authorizations, String query, + Date beginDate, Date endDate, Set types) { + if (StringUtils.isEmpty(query)) { - throw new IllegalArgumentException("NULL QueryNode reference passed to " + this.getClass().getSimpleName()); + throw new IllegalArgumentException( + "NULL QueryNode reference passed to " + this.getClass().getSimpleName()); } - - Set ranges = new HashSet(); + + Set ranges = new HashSet<>(); Set typeFilter = types; String array[] = authorizations.toArray(new String[0]); Authorizations auths = new Authorizations(array); Results results = new Results(); - + // Get the query string String queryString = query; - + StopWatch abstractQueryLogic = new StopWatch(); StopWatch optimizedQuery = new StopWatch(); StopWatch queryGlobalIndex = new StopWatch(); StopWatch optimizedEventQuery = new StopWatch(); StopWatch fullScanQuery = new StopWatch(); StopWatch processResults = new StopWatch(); - + abstractQueryLogic.start(); - + StopWatch parseQuery = new StopWatch(); parseQuery.start(); - + QueryParser parser; try { if (log.isDebugEnabled()) { @@ -473,8 +478,8 @@ public abstract class AbstractQueryLogic { if (log.isDebugEnabled()) { log.debug(hash + " Query: " + queryString); } - - Set fields = new HashSet(); + + Set fields = new HashSet<>(); for (String f : parser.getQueryIdentifiers()) { fields.add(f); } @@ -484,14 +489,14 @@ public abstract class AbstractQueryLogic { // Remove any negated fields from the fields list, we don't want to lookup negated fields // in the index. fields.removeAll(parser.getNegatedTermsForOptimizer()); - + if (log.isDebugEnabled()) { log.debug("getQueryIdentifiers: " + parser.getQueryIdentifiers().toString()); } // Get the mapping of field name to QueryTerm object from the query. The query term object // contains the operator, whether its negated or not, and the literal to test against. Multimap terms = parser.getQueryTerms(); - + // Find out which terms are indexed // TODO: Should we cache indexed terms or does that not make sense since we are always // loading data. @@ -503,10 +508,11 @@ public abstract class AbstractQueryLogic { } catch (Exception e1) { throw new RuntimeException("Error in metadata lookup", e1); } - + // Create a map of indexed term to set of normalizers for it Multimap indexedTerms = HashMultimap.create(); - for (Entry>> entry : metadataResults.entrySet()) { + for (Entry>> entry : metadataResults + .entrySet()) { // Get the normalizer from the normalizer cache for (Class clazz : entry.getValue().values()) { indexedTerms.put(entry.getKey(), normalizerCacheMap.get(clazz)); @@ -516,35 +522,41 @@ public abstract class AbstractQueryLogic { if (log.isDebugEnabled()) { log.debug(hash + " Indexed Terms: " + indexedTerms.toString()); } - + Set orTerms = parser.getOrTermsForOptimizer(); - + // Iterate over the query terms to get the operators specified in the query. - ArrayList unevaluatedExpressions = new ArrayList(); + ArrayList unevaluatedExpressions = new ArrayList<>(); boolean unsupportedOperatorSpecified = false; for (Entry entry : terms.entries()) { if (null == entry.getValue()) { continue; } - - if (null != this.unevaluatedFields && this.unevaluatedFields.contains(entry.getKey().trim())) { - unevaluatedExpressions.add(entry.getKey().trim() + " " + entry.getValue().getOperator() + " " + entry.getValue().getValue()); + + if (null != this.unevaluatedFields + && this.unevaluatedFields.contains(entry.getKey().trim())) { + unevaluatedExpressions.add(entry.getKey().trim() + " " + entry.getValue().getOperator() + + " " + entry.getValue().getValue()); } - + int operator = JexlOperatorConstants.getJJTNodeType(entry.getValue().getOperator()); - if (!(operator == ParserTreeConstants.JJTEQNODE || operator == ParserTreeConstants.JJTNENODE || operator == ParserTreeConstants.JJTLENODE - || operator == ParserTreeConstants.JJTLTNODE || operator == ParserTreeConstants.JJTGENODE || operator == ParserTreeConstants.JJTGTNODE || operator == ParserTreeConstants.JJTERNODE)) { + if (!(operator == ParserTreeConstants.JJTEQNODE || operator == ParserTreeConstants.JJTNENODE + || operator == ParserTreeConstants.JJTLENODE || operator == ParserTreeConstants.JJTLTNODE + || operator == ParserTreeConstants.JJTGENODE || operator == ParserTreeConstants.JJTGTNODE + || operator == ParserTreeConstants.JJTERNODE)) { unsupportedOperatorSpecified = true; break; } } - if (null != unevaluatedExpressions) + if (null != unevaluatedExpressions) { unevaluatedExpressions.trimToSize(); + } if (log.isDebugEnabled()) { - log.debug(hash + " unsupportedOperators: " + unsupportedOperatorSpecified + " indexedTerms: " + indexedTerms.toString() + " orTerms: " - + orTerms.toString() + " unevaluatedExpressions: " + unevaluatedExpressions.toString()); + log.debug(hash + " unsupportedOperators: " + unsupportedOperatorSpecified + " indexedTerms: " + + indexedTerms.toString() + " orTerms: " + orTerms.toString() + + " unevaluatedExpressions: " + unevaluatedExpressions.toString()); } - + // We can use the intersecting iterator over the field index as an optimization under the // following conditions // @@ -554,8 +566,10 @@ public abstract class AbstractQueryLogic { // 1. No unsupported operators in the query. // 2. and all terms indexed // or - // 1. All or'd terms are indexed. NOTE, this will potentially skip some queries and push to a full table scan - // // WE should look into finding a better way to handle whether we do an optimized query or not. + // 1. All or'd terms are indexed. NOTE, this will potentially skip some queries and push to a + // full table scan + // // WE should look into finding a better way to handle whether we do an optimized query or + // not. boolean optimizationSucceeded = false; boolean orsAllIndexed = false; if (orTerms.isEmpty()) { @@ -563,20 +577,24 @@ public abstract class AbstractQueryLogic { } else { orsAllIndexed = indexedTerms.keySet().containsAll(orTerms); } - + if (log.isDebugEnabled()) { log.debug("All or terms are indexed"); } - + if (!unsupportedOperatorSpecified - && (((null == orTerms || orTerms.isEmpty()) && indexedTerms.size() > 0) || (fields.size() > 0 && indexedTerms.size() == fields.size()) || orsAllIndexed)) { + && (((null == orTerms || orTerms.isEmpty()) && indexedTerms.size() > 0) + || (fields.size() > 0 && indexedTerms.size() == fields.size()) || orsAllIndexed)) { optimizedQuery.start(); // Set up intersecting iterator over field index. - - // Get information from the global index for the indexed terms. The results object will contain the term - // mapped to an object that contains the total count, and partitions where this term is located. - - // TODO: Should we cache indexed term information or does that not make sense since we are always loading data + + // Get information from the global index for the indexed terms. The results object will + // contain the term + // mapped to an object that contains the total count, and partitions where this term is + // located. + + // TODO: Should we cache indexed term information or does that not make sense since we are + // always loading data queryGlobalIndex.start(); IndexRanges termIndexInfo; try { @@ -585,7 +603,8 @@ public abstract class AbstractQueryLogic { if (fields.isEmpty()) { termIndexInfo = this.getTermIndexInformation(connector, auths, queryString, typeFilter); if (null != termIndexInfo && termIndexInfo.getRanges().isEmpty()) { - // Then we didn't find anything in the index for this query. This may happen for an indexed term that has wildcards + // Then we didn't find anything in the index for this query. This may happen for an + // indexed term that has wildcards // in unhandled locations. // Break out of here by throwing a named exception and do full scan throw new DoNotPerformOptimizedQueryException(); @@ -605,17 +624,20 @@ public abstract class AbstractQueryLogic { sep = " or "; } if (log.isDebugEnabled()) { - log.debug("Rewrote query for non-fielded single term query: " + queryString + " to " + buf.toString()); + log.debug("Rewrote query for non-fielded single term query: " + queryString + " to " + + buf.toString()); } queryString = buf.toString(); } else { throw new RuntimeException("Unexpected IndexRanges implementation"); } } else { - RangeCalculator calc = this.getTermIndexInformation(connector, auths, indexedTerms, terms, this.getIndexTableName(), this.getReverseIndexTableName(), - queryString, this.queryThreads, typeFilter); + RangeCalculator calc = this.getTermIndexInformation(connector, auths, indexedTerms, terms, + this.getIndexTableName(), this.getReverseIndexTableName(), queryString, + this.queryThreads, typeFilter); if (null == calc.getResult() || calc.getResult().isEmpty()) { - // Then we didn't find anything in the index for this query. This may happen for an indexed term that has wildcards + // Then we didn't find anything in the index for this query. This may happen for an + // indexed term that has wildcards // in unhandled locations. // Break out of here by throwing a named exception and do full scan throw new DoNotPerformOptimizedQueryException(); @@ -639,12 +661,13 @@ public abstract class AbstractQueryLogic { termIndexInfo = null; } queryGlobalIndex.stop(); - + // Determine if we should proceed with optimized query based on results from the global index boolean proceed = false; if (null == termIndexInfo || termIndexInfo.getFieldNamesAndValues().values().size() == 0) { proceed = false; - } else if (null != orTerms && orTerms.size() > 0 && (termIndexInfo.getFieldNamesAndValues().values().size() == indexedTerms.size())) { + } else if (null != orTerms && orTerms.size() > 0 + && (termIndexInfo.getFieldNamesAndValues().values().size() == indexedTerms.size())) { proceed = true; } else if (termIndexInfo.getFieldNamesAndValues().values().size() > 0) { proceed = true; @@ -655,12 +678,14 @@ public abstract class AbstractQueryLogic { } if (log.isDebugEnabled()) { log.debug("Proceed with optimized query: " + proceed); - if (null != termIndexInfo) - log.debug("termIndexInfo.getTermsFound().size(): " + termIndexInfo.getFieldNamesAndValues().values().size() + " indexedTerms.size: " + if (null != termIndexInfo) { + log.debug("termIndexInfo.getTermsFound().size(): " + + termIndexInfo.getFieldNamesAndValues().values().size() + " indexedTerms.size: " + indexedTerms.size() + " fields.size: " + fields.size()); + } } if (proceed) { - + if (log.isDebugEnabled()) { log.debug(hash + " Performing optimized query"); } @@ -669,7 +694,7 @@ public abstract class AbstractQueryLogic { if (log.isDebugEnabled()) { log.info(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString()); } - + // Create BatchScanner, set the ranges, and setup the iterators. optimizedEventQuery.start(); BatchScanner bs = null; @@ -677,13 +702,15 @@ public abstract class AbstractQueryLogic { bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads); bs.setRanges(ranges); IteratorSetting si = new IteratorSetting(21, "eval", OptimizedQueryIterator.class); - + if (log.isDebugEnabled()) { - log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString); + log.debug( + "Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString); } // Set the query option si.addOption(EvaluatingIterator.QUERY_OPTION, queryString); - // Set the Indexed Terms List option. This is the field name and normalized field value pair separated + // Set the Indexed Terms List option. This is the field name and normalized field value + // pair separated // by a comma. StringBuilder buf = new StringBuilder(); String sep = ""; @@ -699,14 +726,16 @@ public abstract class AbstractQueryLogic { } } if (log.isDebugEnabled()) { - log.debug("Setting scan option: " + FieldIndexQueryReWriter.INDEXED_TERMS_LIST + " to " + buf.toString()); + log.debug("Setting scan option: " + FieldIndexQueryReWriter.INDEXED_TERMS_LIST + " to " + + buf.toString()); } FieldIndexQueryReWriter rewriter = new FieldIndexQueryReWriter(); String q = ""; try { q = queryString; - q = rewriter.applyCaseSensitivity(q, true, false);// Set upper/lower case for fieldname/fieldvalue - Map opts = new HashMap(); + q = rewriter.applyCaseSensitivity(q, true, false);// Set upper/lower case for + // fieldname/fieldvalue + Map opts = new HashMap<>(); opts.put(FieldIndexQueryReWriter.INDEXED_TERMS_LIST, buf.toString()); q = rewriter.removeNonIndexedTermsAndInvalidRanges(q, opts); q = rewriter.applyNormalizedTerms(q, opts); @@ -719,7 +748,7 @@ public abstract class AbstractQueryLogic { log.error("Problem rewriting query, Exception: " + ex.getMessage()); } si.addOption(BooleanLogicIterator.FIELD_INDEX_QUERY, q); - + // Set the term cardinality option sep = ""; buf.delete(0, buf.length()); @@ -730,18 +759,21 @@ public abstract class AbstractQueryLogic { buf.append(entry.getValue()); sep = ","; } - if (log.isDebugEnabled()) - log.debug("Setting scan option: " + BooleanLogicIterator.TERM_CARDINALITIES + " to " + buf.toString()); + if (log.isDebugEnabled()) { + log.debug("Setting scan option: " + BooleanLogicIterator.TERM_CARDINALITIES + " to " + + buf.toString()); + } si.addOption(BooleanLogicIterator.TERM_CARDINALITIES, buf.toString()); if (this.useReadAheadIterator) { if (log.isDebugEnabled()) { - log.debug("Enabling read ahead iterator with queue size: " + this.readAheadQueueSize + " and timeout: " + this.readAheadTimeOut); + log.debug("Enabling read ahead iterator with queue size: " + this.readAheadQueueSize + + " and timeout: " + this.readAheadTimeOut); } si.addOption(ReadAheadIterator.QUEUE_SIZE, this.readAheadQueueSize); si.addOption(ReadAheadIterator.TIMEOUT, this.readAheadTimeOut); - + } - + if (null != unevaluatedExpressions) { StringBuilder unevaluatedExpressionList = new StringBuilder(); String sep2 = ""; @@ -749,13 +781,16 @@ public abstract class AbstractQueryLogic { unevaluatedExpressionList.append(sep2).append(exp); sep2 = ","; } - if (log.isDebugEnabled()) - log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString()); - si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString()); + if (log.isDebugEnabled()) { + log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + + unevaluatedExpressionList.toString()); + } + si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, + unevaluatedExpressionList.toString()); } - + bs.addScanIterator(si); - + processResults.start(); processResults.suspend(); long count = 0; @@ -784,16 +819,19 @@ public abstract class AbstractQueryLogic { } optimizedQuery.stop(); } - + // WE should look into finding a better way to handle whether we do an optimized query or not. - // We are not setting up an else condition here because we may have aborted the logic early in the if statement. - if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()) && !orsAllIndexed)) { - // if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && (indexedTerms.size() != fields.size()))) { + // We are not setting up an else condition here because we may have aborted the logic early in + // the if statement. + if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) + && (indexedTerms.size() != fields.size()) && !orsAllIndexed)) { + // if (!optimizationSucceeded || ((null != orTerms && orTerms.size() > 0) && + // (indexedTerms.size() != fields.size()))) { fullScanQuery.start(); if (log.isDebugEnabled()) { log.debug(hash + " Performing full scan query"); } - + // Set up a full scan using the date ranges from the query // Create BatchScanner, set the ranges, and setup the iterators. BatchScanner bs = null; @@ -801,11 +839,11 @@ public abstract class AbstractQueryLogic { // The ranges are the start and end dates Collection r = getFullScanRange(beginDate, endDate, terms); ranges.addAll(r); - + if (log.isDebugEnabled()) { log.debug(hash + " Ranges: count: " + ranges.size() + ", " + ranges.toString()); } - + bs = connector.createBatchScanner(this.getTableName(), auths, queryThreads); bs.setRanges(ranges); IteratorSetting si = new IteratorSetting(22, "eval", EvaluatingIterator.class); @@ -817,14 +855,16 @@ public abstract class AbstractQueryLogic { buf.append(s).append(type).append(".*"); s = "|"; } - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Setting colf regex iterator to: " + buf.toString()); + } IteratorSetting ri = new IteratorSetting(21, "typeFilter", RegExFilter.class); RegExFilter.setRegexs(ri, null, buf.toString(), null, null, false); bs.addScanIterator(ri); } if (log.isDebugEnabled()) { - log.debug("Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString); + log.debug( + "Setting scan option: " + EvaluatingIterator.QUERY_OPTION + " to " + queryString); } si.addOption(EvaluatingIterator.QUERY_OPTION, queryString); if (null != unevaluatedExpressions) { @@ -834,9 +874,12 @@ public abstract class AbstractQueryLogic { unevaluatedExpressionList.append(sep2).append(exp); sep2 = ","; } - if (log.isDebugEnabled()) - log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + unevaluatedExpressionList.toString()); - si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, unevaluatedExpressionList.toString()); + if (log.isDebugEnabled()) { + log.debug("Setting scan option: " + EvaluatingIterator.UNEVALUTED_EXPRESSIONS + " to " + + unevaluatedExpressionList.toString()); + } + si.addOption(EvaluatingIterator.UNEVALUTED_EXPRESSIONS, + unevaluatedExpressionList.toString()); } bs.addScanIterator(si); long count = 0; @@ -863,7 +906,7 @@ public abstract class AbstractQueryLogic { } fullScanQuery.stop(); } - + log.info("AbstractQueryLogic: " + queryString + " " + timeString(abstractQueryLogic.getTime())); log.info(" 1) parse query " + timeString(parseQuery.getTime())); log.info(" 2) query metadata " + timeString(queryMetadata.getTime())); @@ -872,12 +915,12 @@ public abstract class AbstractQueryLogic { log.info(" 1) process results " + timeString(processResults.getTime())); log.info(" 1) query global index " + timeString(queryGlobalIndex.getTime())); log.info(hash + " Query completed."); - + return results; } - + private static String timeString(long millis) { return String.format("%4.2f", millis / 1000.); } - + } diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java index acfb4f4..7e4fd90 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/FieldIndexQueryReWriter.java @@ -49,52 +49,52 @@ import org.apache.hadoop.io.Text; import org.apache.log4j.Level; import org.apache.log4j.Logger; - import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; /** - * The server-side field index queries can only support operations on indexed fields. Additionally, queries that have differing ranges (i.e. one range at the - * fieldname level and another at the fieldValue level) are not currently supported. This class removes these conflicts from the query as well as sets proper - * capitalization configurations etc. - * - * Once the query has been modified, you can pass it to the BooleanLogicIterator on the server-side via the options map. - * + * The server-side field index queries can only support operations on indexed fields. Additionally, + * queries that have differing ranges (i.e. one range at the fieldname level and another at the + * fieldValue level) are not currently supported. This class removes these conflicts from the query + * as well as sets proper capitalization configurations etc. + * + * Once the query has been modified, you can pass it to the BooleanLogicIterator on the server-side + * via the options map. + * */ public class FieldIndexQueryReWriter { - + protected static final Logger log = Logger.getLogger(FieldIndexQueryReWriter.class); - public static final String INDEXED_TERMS_LIST = "INDEXED_TERMS_LIST"; // comma separated list of indexed terms. + public static final String INDEXED_TERMS_LIST = "INDEXED_TERMS_LIST"; // comma separated list of + // indexed terms. public static Set rangeNodeSet; - + static { - rangeNodeSet = new HashSet(); + rangeNodeSet = new HashSet<>(); rangeNodeSet.add(ParserTreeConstants.JJTLENODE); rangeNodeSet.add(ParserTreeConstants.JJTLTNODE); rangeNodeSet.add(ParserTreeConstants.JJTGENODE); rangeNodeSet.add(ParserTreeConstants.JJTGTNODE); rangeNodeSet = Collections.unmodifiableSet(rangeNodeSet); } - + /* * Given a JEXL Query, rewrite it and return it. - * - * 1. ParseQuery 2. Transform query 3. Refactor query 4. remove non-indexed terms a. remove any tree conflicts b. collapse any branches 7. add normalized - * values 8. adjust for case sensitivity 9. add prefix.. but jexl chokes on null byte + * + * 1. ParseQuery 2. Transform query 3. Refactor query 4. remove non-indexed terms a. remove any + * tree conflicts b. collapse any branches 7. add normalized values 8. adjust for case sensitivity + * 9. add prefix.. but jexl chokes on null byte */ public static void setLogLevel(Level lev) { log.setLevel(lev); } - + /** - * - * @param query - * @param options + * * @return String representation of a given query. - * @throws ParseException - * @throws Exception */ - public String removeNonIndexedTermsAndInvalidRanges(String query, Map options) throws ParseException, Exception { + public String removeNonIndexedTermsAndInvalidRanges(String query, Map options) + throws ParseException, Exception { Multimap indexedTerms = parseIndexedTerms(options); RewriterTreeNode node = parseJexlQuery(query); if (log.isDebugEnabled()) { @@ -104,22 +104,19 @@ public class FieldIndexQueryReWriter { node = removeTreeConflicts(node, indexedTerms); node = collapseBranches(node); node = removeNegationViolations(node); - + if (log.isDebugEnabled()) { log.debug("Tree -NonIndexed: " + node.getContents()); } return rebuildQueryFromTree(node); } - + /** - * - * @param query - * @param options + * * @return String representation of a given query. - * @throws ParseException - * @throws Exception */ - public String applyNormalizedTerms(String query, Map options) throws ParseException, Exception { + public String applyNormalizedTerms(String query, Map options) + throws ParseException, Exception { if (log.isDebugEnabled()) { log.debug("applyNormalizedTerms, query: " + query); } @@ -134,16 +131,13 @@ public class FieldIndexQueryReWriter { } return rebuildQueryFromTree(node); } - + /** - * - * @param query - * @param fNameUpper - * @param fValueUpper + * * @return String representation of a given query. - * @throws ParseException */ - public String applyCaseSensitivity(String query, boolean fNameUpper, boolean fValueUpper) throws ParseException { + public String applyCaseSensitivity(String query, boolean fNameUpper, boolean fValueUpper) + throws ParseException { RewriterTreeNode node = parseJexlQuery(query); if (log.isDebugEnabled()) { log.debug("Tree: " + node.getContents()); @@ -154,7 +148,7 @@ public class FieldIndexQueryReWriter { } return rebuildQueryFromTree(node); } - + private String rebuildQueryFromTree(RewriterTreeNode node) { if (node.isLeaf()) { String fName = node.getFieldName(); @@ -177,7 +171,7 @@ public class FieldIndexQueryReWriter { } return fName + operator + "'" + fValue + "'"; } else { - List parts = new ArrayList(); + List parts = new ArrayList<>(); Enumeration children = node.children(); while (children.hasMoreElements()) { RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); @@ -195,7 +189,7 @@ public class FieldIndexQueryReWriter { return query; } } - + /* * Don't use this, Jexl currently chokes on null bytes in the query */ @@ -227,7 +221,7 @@ public class FieldIndexQueryReWriter { } return root; } - + /* * */ @@ -236,11 +230,11 @@ public class FieldIndexQueryReWriter { if (log.isDebugEnabled()) { log.debug("transformTreeNode, Equals Node"); } - + Multimap terms = node.getTerms(); for (String fName : terms.keySet()) { Collection values = terms.get(fName); - + for (QueryTerm t : values) { if (null == t || null == t.getValue()) { continue; @@ -248,17 +242,18 @@ public class FieldIndexQueryReWriter { String fValue = t.getValue().toString(); fValue = fValue.replaceAll("'", ""); boolean negated = t.getOperator().equals("!="); - RewriterTreeNode child = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, fValue, negated); + RewriterTreeNode child = + new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, fValue, negated); return child; } } } - + if (node.getType().equals(ASTERNode.class) || node.getType().equals(ASTNRNode.class)) { if (log.isDebugEnabled()) { log.debug("transformTreeNode, Regex Node"); } - + Multimap terms = node.getTerms(); for (String fName : terms.keySet()) { Collection values = terms.get(fName); @@ -269,14 +264,15 @@ public class FieldIndexQueryReWriter { String fValue = t.getValue().toString(); fValue = fValue.replaceAll("'", ""); boolean negated = node.getType().equals(ASTNRNode.class); - RewriterTreeNode child = new RewriterTreeNode(ParserTreeConstants.JJTERNODE, fName, fValue, negated); + RewriterTreeNode child = + new RewriterTreeNode(ParserTreeConstants.JJTERNODE, fName, fValue, negated); return child; } } } - - if (node.getType().equals(ASTLTNode.class) || node.getType().equals(ASTLENode.class) || node.getType().equals(ASTGTNode.class) - || node.getType().equals(ASTGENode.class)) { + + if (node.getType().equals(ASTLTNode.class) || node.getType().equals(ASTLENode.class) + || node.getType().equals(ASTGTNode.class) || node.getType().equals(ASTGENode.class)) { if (log.isDebugEnabled()) { log.debug("transformTreeNode, LT/LE/GT/GE node"); } @@ -289,18 +285,20 @@ public class FieldIndexQueryReWriter { } String fValue = t.getValue().toString(); fValue = fValue.replaceAll("'", "").toLowerCase(); - boolean negated = false; // to be negated, must be child of Not, which is handled elsewhere. + boolean negated = false; // to be negated, must be child of Not, which is handled + // elsewhere. int mytype = JexlOperatorConstants.getJJTNodeType(t.getOperator()); RewriterTreeNode child = new RewriterTreeNode(mytype, fName, fValue, negated); return child; } } } - + RewriterTreeNode returnNode = null; - + if (node.getType().equals(ASTAndNode.class) || node.getType().equals(ASTOrNode.class)) { - int parentType = node.getType().equals(ASTAndNode.class) ? ParserTreeConstants.JJTANDNODE : ParserTreeConstants.JJTORNODE; + int parentType = node.getType().equals(ASTAndNode.class) ? ParserTreeConstants.JJTANDNODE + : ParserTreeConstants.JJTORNODE; if (log.isDebugEnabled()) { log.debug("transformTreeNode, AND/OR node: " + parentType); } @@ -350,8 +348,9 @@ public class FieldIndexQueryReWriter { } else { returnNode = new RewriterTreeNode(ParserTreeConstants.JJTNOTNODE); } - } else if (node.getType().equals(ASTJexlScript.class) || node.getType().getSimpleName().equals("RootNode")) { - + } else if (node.getType().equals(ASTJexlScript.class) + || node.getType().getSimpleName().equals("RootNode")) { + if (log.isDebugEnabled()) { log.debug("transformTreeNode, ROOT/JexlScript node"); } @@ -378,43 +377,48 @@ public class FieldIndexQueryReWriter { returnNode = new RewriterTreeNode(ParserTreeConstants.JJTJEXLSCRIPT); } } else { - log.error("transformTreeNode, Currently Unsupported Node type: " + node.getClass().getName() + " \t" + node.getType()); + log.error("transformTreeNode, Currently Unsupported Node type: " + node.getClass().getName() + + " \t" + node.getType()); } for (TreeNode child : node.getChildren()) { returnNode.add(transformTreeNode(child)); } - + return returnNode; } - - private RewriterTreeNode removeNonIndexedTerms(RewriterTreeNode root, Multimap indexedTerms) throws Exception { - // public void removeNonIndexedTerms(BooleanLogicTreeNodeJexl myroot, String indexedTerms) throws Exception { + + private RewriterTreeNode removeNonIndexedTerms(RewriterTreeNode root, + Multimap indexedTerms) throws Exception { + // public void removeNonIndexedTerms(BooleanLogicTreeNodeJexl myroot, String indexedTerms) + // throws Exception { if (indexedTerms.isEmpty()) { throw new Exception("removeNonIndexedTerms, indexed Terms empty"); } - + // NOTE: doing a depth first enumeration didn't work when I started // removing nodes halfway through. The following method does work, // it's essentially a reverse breadth first traversal. - List nodes = new ArrayList(); + List nodes = new ArrayList<>(); Enumeration bfe = root.breadthFirstEnumeration(); - + while (bfe.hasMoreElements()) { RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement(); nodes.add(node); } - + // walk backwards for (int i = nodes.size() - 1; i >= 0; i--) { RewriterTreeNode node = nodes.get(i); if (log.isDebugEnabled()) { - log.debug("removeNonIndexedTerms, analyzing node: " + node.toString() + " " + node.printNode()); + log.debug( + "removeNonIndexedTerms, analyzing node: " + node.toString() + " " + node.printNode()); } - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { + if (node.getType() == ParserTreeConstants.JJTANDNODE + || node.getType() == ParserTreeConstants.JJTORNODE) { // If all of your children are gone, AND/OR has no purpose, remove if (node.getChildCount() == 0) { node.removeFromParent(); - + // If AND/OR has only 1 child, attach it to the parent directly. } else if (node.getChildCount() == 1) { RewriterTreeNode p = (RewriterTreeNode) node.getParent(); @@ -432,10 +436,12 @@ public class FieldIndexQueryReWriter { continue; } else { if (log.isDebugEnabled()) { - log.debug("removeNonIndexedTerms, Testing: " + node.getFieldName() + ":" + node.getFieldValue()); + log.debug("removeNonIndexedTerms, Testing: " + node.getFieldName() + ":" + + node.getFieldValue()); } - - if (!indexedTerms.containsKey(node.getFieldName().toString() + ":" + node.getFieldValue().toString())) { + + if (!indexedTerms + .containsKey(node.getFieldName().toString() + ":" + node.getFieldValue().toString())) { if (log.isDebugEnabled()) { log.debug(node.getFieldName() + ":" + node.getFieldValue() + " is NOT indexed"); } @@ -447,11 +453,12 @@ public class FieldIndexQueryReWriter { } } } - + return root; } - - private RewriterTreeNode orNormalizedTerms(RewriterTreeNode myroot, Multimap indexedTerms) throws Exception { + + private RewriterTreeNode orNormalizedTerms(RewriterTreeNode myroot, + Multimap indexedTerms) throws Exception { // we have multimap of FieldName to multiple FieldValues if (indexedTerms.isEmpty()) { throw new Exception("indexed Terms empty"); @@ -460,21 +467,23 @@ public class FieldIndexQueryReWriter { // NOTE: doing a depth first enumeration didn't work when I started // removing nodes halfway through. The following method does work, // it's essentially a reverse breadth first traversal. - List nodes = new ArrayList(); + List nodes = new ArrayList<>(); Enumeration bfe = myroot.breadthFirstEnumeration(); - + while (bfe.hasMoreElements()) { RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement(); nodes.add(node); } - + // walk backwards for (int i = nodes.size() - 1; i >= 0; i--) { RewriterTreeNode node = nodes.get(i); if (log.isDebugEnabled()) { - log.debug("orNormalizedTerms, analyzing node: " + node.toString() + " " + node.printNode()); + log.debug( + "orNormalizedTerms, analyzing node: " + node.toString() + " " + node.printNode()); } - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { + if (node.getType() == ParserTreeConstants.JJTANDNODE + || node.getType() == ParserTreeConstants.JJTORNODE) { continue; } else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) { if (node.getChildCount() == 0) { @@ -490,7 +499,7 @@ public class FieldIndexQueryReWriter { String fName = node.getFieldName().toString(); String fValue = node.getFieldValue().toString(); if (indexedTerms.containsKey(fName + ":" + fValue)) { - + if (indexedTerms.get(fName + ":" + fValue).size() > 1) { // Replace node with an OR, and make children from the multimap collection node.setType(ParserTreeConstants.JJTORNODE); @@ -500,7 +509,8 @@ public class FieldIndexQueryReWriter { node.setFieldValue(null); Collection values = indexedTerms.get(fName + ":" + fValue); for (String value : values) { - RewriterTreeNode n = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, value, neg); + RewriterTreeNode n = + new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, fName, value, neg); node.add(n); } } else if (indexedTerms.get(fName + ":" + fValue).size() == 1) { @@ -511,9 +521,10 @@ public class FieldIndexQueryReWriter { node.setFieldValue(val); } } - + } else { - // throw new Exception("orNormalizedTerms, encountered a non-indexed term: " + node.getFieldName().toString()); + // throw new Exception("orNormalizedTerms, encountered a non-indexed term: " + + // node.getFieldName().toString()); } } } @@ -521,63 +532,68 @@ public class FieldIndexQueryReWriter { log.debug("Caught exception in orNormalizedTerms(): " + e); throw new Exception("exception in: orNormalizedTerms"); } - + return myroot; } - + /*** - * We only want to pass ranges on if they meet a very narrow set of conditions. All ranges must be bounded i.e. x between(1,5) so their parent is an AND. We - * will only pass a range if 1. The AND is the direct child of HEAD node 2. The AND is a child of an OR which is a direct child of HEAD node. - * - * If there is an HEAD-AND[x,OR[b,AND[range]]], and you remove the range, this turns the tree into HEAD-AND[X,OR[B]] which becomes HEAD-AND[X,B] which will - * miss entries, so you need to cut out the entire OR at this point and let the positive side of the AND pick it up. + * We only want to pass ranges on if they meet a very narrow set of conditions. All ranges must be + * bounded i.e. x between(1,5) so their parent is an AND. We will only pass a range if 1. The AND + * is the direct child of HEAD node 2. The AND is a child of an OR which is a direct child of HEAD + * node. + * + * If there is an HEAD-AND[x,OR[b,AND[range]]], and you remove the range, this turns the tree into + * HEAD-AND[X,OR[B]] which becomes HEAD-AND[X,B] which will miss entries, so you need to cut out + * the entire OR at this point and let the positive side of the AND pick it up. */ - private RewriterTreeNode removeTreeConflicts(RewriterTreeNode root, Multimap indexedTerms) { + private RewriterTreeNode removeTreeConflicts(RewriterTreeNode root, + Multimap indexedTerms) { if (log.isDebugEnabled()) { log.debug("removeTreeConflicts"); } - + /* - * You can't modify the enumeration, so save it into a list. We want to walk backwards in a breadthFirstEnumeration. So we don't throw null pointers when we - * erase nodes and shorten our list. + * You can't modify the enumeration, so save it into a list. We want to walk backwards in a + * breadthFirstEnumeration. So we don't throw null pointers when we erase nodes and shorten our + * list. */ - List nodeList = new ArrayList(); + List nodeList = new ArrayList<>(); Enumeration nodes = root.breadthFirstEnumeration(); while (nodes.hasMoreElements()) { RewriterTreeNode child = (RewriterTreeNode) nodes.nextElement(); nodeList.add(child); } - + // walk backwards for (int i = nodeList.size() - 1; i >= 0; i--) { RewriterTreeNode node = nodeList.get(i); - + if (node.isRemoval()) { node.removeFromParent(); continue; } - + RewriterTreeNode parent = (RewriterTreeNode) node.getParent(); /* - * All ranges must be bounded! This means the range must be part of an AND, and the parent of AND must be a HEAD node or an OR whose parent is a HEAD - * node. + * All ranges must be bounded! This means the range must be part of an AND, and the parent of + * AND must be a HEAD node or an OR whose parent is a HEAD node. */ - if (node.getType() == ParserTreeConstants.JJTANDNODE - && (node.getLevel() == 1 || (parent.getType() == ParserTreeConstants.JJTORNODE && parent.getLevel() == 1))) { - + if (node.getType() == ParserTreeConstants.JJTANDNODE && (node.getLevel() == 1 + || (parent.getType() == ParserTreeConstants.JJTORNODE && parent.getLevel() == 1))) { + if (log.isDebugEnabled()) { log.debug("AND at level 1 or with OR parent at level 1"); } Map rangeMap = getBoundedRangeMap(node); - + // can't modify the enumeration... save children to a list. - List childList = new ArrayList(); + List childList = new ArrayList<>(); Enumeration children = node.children(); while (children.hasMoreElements()) { RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); childList.add(child); } - + for (int j = childList.size() - 1; j >= 0; j--) { RewriterTreeNode child = childList.get(j); // currently we are not allowing unbounded ranges, so they must sit under an AND node. @@ -608,49 +624,52 @@ public class FieldIndexQueryReWriter { if (singleSib) { child.removeFromParent(); } else { - if (indexedTerms.containsKey(child.getFieldName() + ":" + child.getFieldValue())) { + if (indexedTerms + .containsKey(child.getFieldName() + ":" + child.getFieldValue())) { if (log.isDebugEnabled()) { log.debug("removeTreeConflicts, node: " + node.getContents()); } // swap parent AND with an OR node.removeAllChildren(); node.setType(ParserTreeConstants.JJTORNODE); - - Collection values = indexedTerms.get(child.getFieldName() + ":" + child.getFieldValue()); + + Collection values = + indexedTerms.get(child.getFieldName() + ":" + child.getFieldValue()); for (String value : values) { - RewriterTreeNode n = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, child.getFieldName(), value, child.isNegated()); + RewriterTreeNode n = new RewriterTreeNode(ParserTreeConstants.JJTEQNODE, + child.getFieldName(), value, child.isNegated()); node.add(n); } if (log.isDebugEnabled()) { log.debug("removeTreeConflicts, node: " + node.getContents()); } - + break; } else { child.removeFromParent(); } - + } } } } - }// end inner for - + } // end inner for + } else { // remove all ranges! if (node.isLeaf()) { continue; } // can't modify the enumeration... - List childList = new ArrayList(); + List childList = new ArrayList<>(); Enumeration children = node.children(); while (children.hasMoreElements()) { RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); childList.add(child); } - + // walk backwards for (int j = childList.size() - 1; j >= 0; j--) { - + RewriterTreeNode child = childList.get(j); if (log.isDebugEnabled()) { log.debug("removeTreeConflicts, looking at node: " + node); @@ -658,24 +677,25 @@ public class FieldIndexQueryReWriter { if (rangeNodeSet.contains(child.getType())) { // if grand parent is an OR and not top level, mark whole thing for removal. RewriterTreeNode grandParent = (RewriterTreeNode) child.getParent().getParent(); - if (grandParent.getType() == ParserTreeConstants.JJTORNODE && grandParent.getLevel() != 1) { + if (grandParent.getType() == ParserTreeConstants.JJTORNODE + && grandParent.getLevel() != 1) { grandParent.setRemoval(true); } child.removeFromParent(); } } } - - }// end outer for - + + } // end outer for + return root; } - + private RewriterTreeNode removeNegationViolations(RewriterTreeNode node) throws Exception { // Double check the top level node for negation violations // if AND, one child must be positive, if OR, no negatives allowed. RewriterTreeNode one = (RewriterTreeNode) node.getFirstChild(); // Head node has only 1 child. - ArrayList childrenList = new ArrayList(); + ArrayList childrenList = new ArrayList<>(); Enumeration children = one.children(); while (children.hasMoreElements()) { RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); @@ -702,33 +722,35 @@ public class FieldIndexQueryReWriter { throw new Exception("FieldIndexQueryReWriter: Top level query node cannot be processed."); } } - + return node; } - + // After tree conflicts have been resolve, we can collapse branches where // leaves have been pruned. private RewriterTreeNode collapseBranches(RewriterTreeNode myroot) throws Exception { - + // NOTE: doing a depth first enumeration didn't wory when I started // removing nodes halfway through. The following method does work, // it's essentially a reverse breadth first traversal. - List nodes = new ArrayList(); + List nodes = new ArrayList<>(); Enumeration bfe = myroot.breadthFirstEnumeration(); - + while (bfe.hasMoreElements()) { RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement(); nodes.add(node); } - + // walk backwards for (int i = nodes.size() - 1; i >= 0; i--) { RewriterTreeNode node = nodes.get(i); if (log.isDebugEnabled()) { - log.debug("collapseBranches, inspecting node: " + node.toString() + " " + node.printNode()); + log.debug( + "collapseBranches, inspecting node: " + node.toString() + " " + node.printNode()); } - - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { + + if (node.getType() == ParserTreeConstants.JJTANDNODE + || node.getType() == ParserTreeConstants.JJTORNODE) { if (node.getChildCount() == 0) { node.removeFromParent(); } else if (node.getChildCount() == 1) { @@ -736,7 +758,7 @@ public class FieldIndexQueryReWriter { RewriterTreeNode c = (RewriterTreeNode) node.getFirstChild(); node.removeFromParent(); p.add(c); - + } } else if (node.getType() == ParserTreeConstants.JJTJEXLSCRIPT) { if (node.getChildCount() == 0) { @@ -746,10 +768,7 @@ public class FieldIndexQueryReWriter { } return myroot; } - - /** - * @param options - */ + public Multimap parseIndexedTerms(Map options) { if (options.get(INDEXED_TERMS_LIST) != null) { Multimap mmap = HashMultimap.create(); @@ -764,9 +783,9 @@ public class FieldIndexQueryReWriter { for (int i = 2; i < parts.length; i++) { // key is original query token, i.e. color:red mmap.put(parts[0] + ":" + parts[1], parts[i]); - + } - + } if (log.isDebugEnabled()) { log.debug("multimap: " + mmap); @@ -778,16 +797,13 @@ public class FieldIndexQueryReWriter { } return null; } - - /** - * @param root - */ + public RewriterTreeNode refactorTree(RewriterTreeNode root) { Enumeration dfe = root.breadthFirstEnumeration(); - + while (dfe.hasMoreElements()) { RewriterTreeNode n = (RewriterTreeNode) dfe.nextElement(); - + if (n.getType() == ParserTreeConstants.JJTNOTNODE) {// BooleanLogicTreeNode.NodeType.NOT) { RewriterTreeNode child = (RewriterTreeNode) n.getChildAt(0); child.setNegated(true); @@ -796,46 +812,48 @@ public class FieldIndexQueryReWriter { parent.add(child); } } - + // cycle through again and distribute nots Enumeration bfe = root.breadthFirstEnumeration(); RewriterTreeNode child; - + while (bfe.hasMoreElements()) { child = (RewriterTreeNode) bfe.nextElement(); - + if (child.isNegated()) { if (child.getChildCount() > 0) { demorganSubTree(child); - + } } } return root; - + } - + private void demorganSubTree(RewriterTreeNode root) { - + root.setNegated(false); // root.setChildrenAllNegated(true); - + if (root.getType() == ParserTreeConstants.JJTANDNODE) {// BooleanLogicTreeNode.NodeType.AND) { // root.setType(BooleanLogicTreeNode.NodeType.OR); root.setType(ParserTreeConstants.JJTORNODE); - } else if (root.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) { + } else if (root.getType() == ParserTreeConstants.JJTORNODE) {// BooleanLogicTreeNode.NodeType.OR) + // { // root.setType(BooleanLogicTreeNode.NodeType.AND); root.setType(ParserTreeConstants.JJTANDNODE); - } else if (root.getType() == ParserTreeConstants.JJTEQNODE || root.getType() == ParserTreeConstants.JJTERNODE) { + } else if (root.getType() == ParserTreeConstants.JJTEQNODE + || root.getType() == ParserTreeConstants.JJTERNODE) { // do nothing } else { log.error("refactorSubTree, node type not supported"); } - + Enumeration children = root.children(); RewriterTreeNode child = null; // now distribute the negative - + while (children.hasMoreElements()) { child = (RewriterTreeNode) children.nextElement(); if (child.isNegated()) { @@ -845,32 +863,37 @@ public class FieldIndexQueryReWriter { } } } - - private RewriterTreeNode applyCaseSensitivity(RewriterTreeNode root, boolean fnUpper, boolean fvUpper) { + + private RewriterTreeNode applyCaseSensitivity(RewriterTreeNode root, boolean fnUpper, + boolean fvUpper) { // for each leaf, apply case sensitivity Enumeration bfe = root.breadthFirstEnumeration(); while (bfe.hasMoreElements()) { RewriterTreeNode node = (RewriterTreeNode) bfe.nextElement(); if (node.isLeaf()) { - String fName = fnUpper ? node.getFieldName().toUpperCase() : node.getFieldName().toLowerCase(); + String fName = + fnUpper ? node.getFieldName().toUpperCase() : node.getFieldName().toLowerCase(); node.setFieldName(fName); - - String fValue = fvUpper ? node.getFieldValue().toUpperCase() : node.getFieldValue().toLowerCase(); + + String fValue = + fvUpper ? node.getFieldValue().toUpperCase() : node.getFieldValue().toLowerCase(); node.setFieldValue(fValue); - + } } return root; } - + private Map getBoundedRangeMap(RewriterTreeNode node) { - - if (node.getType() == ParserTreeConstants.JJTANDNODE || node.getType() == ParserTreeConstants.JJTORNODE) { + + if (node.getType() == ParserTreeConstants.JJTANDNODE + || node.getType() == ParserTreeConstants.JJTORNODE) { Enumeration children = node.children(); - Map rangeMap = new HashMap(); + Map rangeMap = new HashMap<>(); while (children.hasMoreElements()) { RewriterTreeNode child = (RewriterTreeNode) children.nextElement(); - if (child.getType() == ParserTreeConstants.JJTLENODE || child.getType() == ParserTreeConstants.JJTLTNODE) { + if (child.getType() == ParserTreeConstants.JJTLENODE + || child.getType() == ParserTreeConstants.JJTLTNODE) { Text fName = new Text(child.getFieldName()); if (rangeMap.containsKey(fName)) { RangeBounds rb = rangeMap.get(fName); @@ -883,8 +906,9 @@ public class FieldIndexQueryReWriter { rb.setLower(new Text(child.getFieldValue())); rangeMap.put(new Text(child.getFieldName()), rb); } - - } else if (child.getType() == ParserTreeConstants.JJTGENODE || child.getType() == ParserTreeConstants.JJTGTNODE) { + + } else if (child.getType() == ParserTreeConstants.JJTGENODE + || child.getType() == ParserTreeConstants.JJTGTNODE) { Text fName = new Text(child.getFieldName()); if (rangeMap.containsKey(fName)) { RangeBounds rb = rangeMap.get(fName); @@ -899,13 +923,14 @@ public class FieldIndexQueryReWriter { } } } - + for (Entry entry : rangeMap.entrySet()) { RangeBounds rb = entry.getValue(); if (rb.getLower() == null || rb.getUpper() == null) { // unbounded range, remove if (log.isDebugEnabled()) { - log.debug("testBoundedRangeExistence: Unbounded Range detected, removing entry from rangeMap"); + log.debug( + "testBoundedRangeExistence: Unbounded Range detected, removing entry from rangeMap"); } rangeMap.remove(entry.getKey()); } @@ -914,15 +939,15 @@ public class FieldIndexQueryReWriter { return rangeMap; } } - + return null; } - + /** * INNER CLASSES */ public class RewriterTreeNode extends DefaultMutableTreeNode { - + private static final long serialVersionUID = 1L; private boolean negated = false; private String fieldName; @@ -930,43 +955,26 @@ public class FieldIndexQueryReWriter { private String operator; private int type; private boolean removal = false; - - /** - * - * @param type - */ + public RewriterTreeNode(int type) { super(); this.type = type; } - - /** - * - * @param type - * @param fName - * @param fValue - */ + public RewriterTreeNode(int type, String fName, String fValue) { super(); init(type, fName, fValue); } - - /** - * - * @param type - * @param fName - * @param fValue - * @param negate - */ + public RewriterTreeNode(int type, String fName, String fValue, boolean negate) { super(); init(type, fName, fValue, negate); } - + private void init(int type, String fName, String fValue) { init(type, fName, fValue, false); } - + private void init(int type, String fName, String fValue, boolean negate) { this.type = type; this.fieldName = fName; @@ -977,98 +985,78 @@ public class FieldIndexQueryReWriter { log.debug("FN: " + this.fieldName + " FV: " + this.fieldValue + " Op: " + this.operator); } } - + /** * @return The field name. */ public String getFieldName() { return fieldName; } - - /** - * - * @param fieldName - */ + public void setFieldName(String fieldName) { this.fieldName = fieldName; } - + /** - * + * * @return The field value. */ public String getFieldValue() { return fieldValue; } - - /** - * - * @param fieldValue - */ + public void setFieldValue(String fieldValue) { this.fieldValue = fieldValue; } - + /** - * + * * @return true if negated, otherwise false. */ public boolean isNegated() { return negated; } - - /** - * - * @param negated - */ + public void setNegated(boolean negated) { this.negated = negated; } - + /** - * + * * @return The operator. */ public String getOperator() { return operator; } - - /** - * - * @param operator - */ + public void setOperator(String operator) { this.operator = operator; } - + /** - * + * * @return The type. */ public int getType() { return type; } - - /** - * - * @param type - */ + public void setType(int type) { this.type = type; } - + public boolean isRemoval() { return removal; } - + public void setRemoval(boolean removal) { this.removal = removal; } - + public String getContents() { StringBuilder s = new StringBuilder("["); s.append(toString()); - + if (children != null) { Enumeration e = this.children(); while (e.hasMoreElements()) { @@ -1080,9 +1068,9 @@ public class FieldIndexQueryReWriter { s.append("]"); return s.toString(); } - + /** - * + * * @return A string represenation of the field name and value. */ public String printNode() { @@ -1102,7 +1090,7 @@ public class FieldIndexQueryReWriter { s.append("]"); return s.toString(); } - + @Override public String toString() { switch (type) { diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java index aaac3d8..47d6f20 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/QueryEvaluator.java @@ -23,7 +23,6 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Set; - import org.apache.accumulo.examples.wikisearch.function.QueryFunctions; import org.apache.accumulo.examples.wikisearch.jexl.Arithmetic; import org.apache.accumulo.examples.wikisearch.parser.EventFields.FieldValue; @@ -38,24 +37,23 @@ import org.apache.commons.jexl2.parser.ParserTreeConstants; import org.apache.log4j.Level; import org.apache.log4j.Logger; - import com.google.common.collect.Multimap; - /** - * This class evaluates events against a query. The query is passed to the constructor and then parsed. It is evaluated against an event in the evaluate method. + * This class evaluates events against a query. The query is passed to the constructor and then + * parsed. It is evaluated against an event in the evaluate method. */ public class QueryEvaluator { - + private static Logger log = Logger.getLogger(QueryEvaluator.class); // According to the JEXL 2.0 docs, the engine is thread-safe. Let's create 1 engine per VM and // cache 128 expressions private static JexlEngine engine = new JexlEngine(null, new Arithmetic(false), null, null); - + static { engine.setSilent(false); engine.setCache(128); - Map functions = new HashMap(); + Map functions = new HashMap<>(); functions.put("f", QueryFunctions.class); engine.setFunctions(functions); } @@ -65,7 +63,7 @@ public class QueryEvaluator { private String modifiedQuery = null; private JexlContext ctx = new MapContext(); private boolean caseInsensitive = true; - + public QueryEvaluator(String query) throws ParseException { this.caseInsensitive = true; // default case insensitive matching. if (caseInsensitive) { @@ -76,7 +74,7 @@ public class QueryEvaluator { parser.execute(query); this.terms = parser.getQueryTerms(); if (caseInsensitive) { - literals = new HashSet(); + literals = new HashSet<>(); for (String lit : parser.getQueryIdentifiers()) { literals.add(lit.toLowerCase()); } @@ -84,7 +82,7 @@ public class QueryEvaluator { this.literals = parser.getQueryIdentifiers(); } } - + public QueryEvaluator(String query, boolean insensitive) throws ParseException { this.caseInsensitive = insensitive; if (this.caseInsensitive) { @@ -94,9 +92,9 @@ public class QueryEvaluator { QueryParser parser = new QueryParser(); parser.execute(query); this.terms = parser.getQueryTerms(); - + if (caseInsensitive) { - literals = new HashSet(); + literals = new HashSet<>(); for (String lit : parser.getQueryIdentifiers()) { literals.add(lit.toLowerCase()); } @@ -104,22 +102,23 @@ public class QueryEvaluator { this.literals = parser.getQueryIdentifiers(); } } - + public String getQuery() { return this.query; } - + public void printLiterals() { for (String s : literals) { System.out.println("literal: " + s); } } - + public void setLevel(Level lev) { log.setLevel(lev); } - - public StringBuilder rewriteQuery(StringBuilder query, String fieldName, Collection fieldValues) { + + public StringBuilder rewriteQuery(StringBuilder query, String fieldName, + Collection fieldValues) { if (log.isDebugEnabled()) { log.debug("rewriteQuery"); } @@ -145,37 +144,42 @@ public class QueryEvaluator { } // Add the array to the context ctx.set(fieldName, values); - + Collection qt = terms.get(fieldName); - + // Add a script to the beginning of the query for this multi-valued field StringBuilder script = new StringBuilder(); script.append("_").append(fieldName).append(" = false;\n"); script.append("for (field : ").append(fieldName).append(") {\n"); - + for (QueryTerm t : qt) { - if (!t.getOperator().equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) { - script.append("\tif (_").append(fieldName).append(" == false && field ").append(t.getOperator()).append(" ").append(t.getValue()).append(") { \n"); + if (!t.getOperator() + .equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) { + script.append("\tif (_").append(fieldName).append(" == false && field ") + .append(t.getOperator()).append(" ").append(t.getValue()).append(") { \n"); } else { - script.append("\tif (_").append(fieldName).append(" == false && ").append(t.getValue().toString().replace(fieldName, "field")).append(") { \n"); + script.append("\tif (_").append(fieldName).append(" == false && ") + .append(t.getValue().toString().replace(fieldName, "field")).append(") { \n"); } script.append("\t\t_").append(fieldName).append(" = true;\n"); script.append("\t}\n"); } script.append("}\n"); - + // Add the script to the beginning of the query query.insert(0, script.toString()); - + StringBuilder newPredicate = new StringBuilder(); newPredicate.append("_").append(fieldName).append(" == true"); - + for (QueryTerm t : qt) { // Find the location of this term in the query StringBuilder predicate = new StringBuilder(); int start = 0; - if (!t.getOperator().equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) { - predicate.append(fieldName).append(" ").append(t.getOperator()).append(" ").append(t.getValue()); + if (!t.getOperator() + .equals(JexlOperatorConstants.getOperator(ParserTreeConstants.JJTFUNCTIONNODE))) { + predicate.append(fieldName).append(" ").append(t.getOperator()).append(" ") + .append(t.getValue()); start = query.indexOf(predicate.toString()); } else { predicate.append(t.getValue().toString()); @@ -183,37 +187,36 @@ public class QueryEvaluator { start = query.indexOf(predicate.toString()); } if (-1 == start) { - log.warn("Unable to find predicate: " + predicate.toString() + " in rewritten query: " + query.toString()); + log.warn("Unable to find predicate: " + predicate.toString() + " in rewritten query: " + + query.toString()); } int length = predicate.length(); - + // Now modify the query to check the value of my.fieldName query.replace(start, start + length, newPredicate.toString()); } - + if (log.isDebugEnabled()) { log.debug("leaving rewriteQuery with: " + query.toString()); } return query; } - + /** * Evaluates the query against an event. - * - * @param eventFields */ public boolean evaluate(EventFields eventFields) { - + this.modifiedQuery = null; boolean rewritten = false; - + // Copy the query StringBuilder q = new StringBuilder(query); // Copy the literals, we are going to remove elements from this set // when they are added to the JEXL context. This will allow us to // determine which items in the query where *NOT* in the data. - HashSet literalsCopy = new HashSet(literals); - + HashSet literalsCopy = new HashSet<>(literals); + // Loop through the event fields and add them to the JexlContext. for (Entry> field : eventFields.asMap().entrySet()) { String fName = field.getKey(); @@ -226,52 +229,55 @@ public class QueryEvaluator { } else { literalsCopy.remove(fName); } - + // This field may have multiple values. if (field.getValue().size() == 0) { continue; } else if (field.getValue().size() == 1) { // We are explicitly converting these bytes to a String. if (caseInsensitive) { - ctx.set(field.getKey().toLowerCase(), (new String(field.getValue().iterator().next().getValue())).toLowerCase()); + ctx.set(field.getKey().toLowerCase(), + (new String(field.getValue().iterator().next().getValue())).toLowerCase()); } else { ctx.set(field.getKey(), new String(field.getValue().iterator().next().getValue())); } - + } else { // q = queryRewrite(q, field.getKey(), field.getValue()); q = rewriteQuery(q, field.getKey(), field.getValue()); rewritten = true; - }// End of if - - }// End of loop - + } // End of if + + } // End of loop + // For any literals in the query that were not found in the data, add them to the context // with a null value. for (String lit : literalsCopy) { ctx.set(lit, null); } - + if (log.isDebugEnabled()) { log.debug("Evaluating query: " + q.toString()); } - + this.modifiedQuery = q.toString(); - + Boolean result = null; if (rewritten) { Script script = engine.createScript(this.modifiedQuery); try { result = (Boolean) script.execute(ctx); } catch (Exception e) { - log.error("Error evaluating script: " + this.modifiedQuery + " against event" + eventFields.toString(), e); + log.error("Error evaluating script: " + this.modifiedQuery + " against event" + + eventFields.toString(), e); } } else { Expression expr = engine.createExpression(this.modifiedQuery); try { result = (Boolean) expr.evaluate(ctx); } catch (Exception e) { - log.error("Error evaluating expression: " + this.modifiedQuery + " against event" + eventFields.toString(), e); + log.error("Error evaluating expression: " + this.modifiedQuery + " against event" + + eventFields.toString(), e); } } if (null != result && result) { @@ -280,9 +286,9 @@ public class QueryEvaluator { return false; } } // End of method - + /** - * + * * @return rewritten query that was evaluated against the most recent event */ public String getModifiedQuery() { diff --git a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java index 8a5474b..105351d 100644 --- a/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java +++ b/query/src/main/java/org/apache/accumulo/examples/wikisearch/parser/RangeCalculator.java @@ -16,7 +16,6 @@ */ package org.apache.accumulo.examples.wikisearch.parser; - import java.util.HashMap; import java.util.HashSet; import java.util.Map; @@ -62,72 +61,77 @@ import com.google.common.collect.Multimap; import com.google.protobuf.InvalidProtocolBufferException; /** - * This class is used to query the global indices to determine that set of ranges to use when querying the shard table. The RangeCalculator looks at each term - * in the query to determine if it is a equivalence, range, or wildcard comparison, and queries the appropriate index to find the ranges for the terms which are - * then cached. The final set of ranges is computed as the AST is traversed. + * This class is used to query the global indices to determine that set of ranges to use when + * querying the shard table. The RangeCalculator looks at each term in the query to determine if it + * is a equivalence, range, or wildcard comparison, and queries the appropriate index to find the + * ranges for the terms which are then cached. The final set of ranges is computed as the AST is + * traversed. */ public class RangeCalculator extends QueryParser { - + /** * Container used as map keys in this class - * + * */ public static class MapKey implements Comparable { private String fieldName = null; private String fieldValue = null; private String originalQueryValue = null; - + public MapKey(String fieldName, String fieldValue) { super(); this.fieldName = fieldName; this.fieldValue = fieldValue; } - + public String getFieldName() { return fieldName; } - + public String getFieldValue() { return fieldValue; } - + public void setFieldName(String fieldName) { this.fieldName = fieldName; } - + public void setFieldValue(String fieldValue) { this.fieldValue = fieldValue; } - + public String getOriginalQueryValue() { return originalQueryValue; } - + public void setOriginalQueryValue(String originalQueryValue) { this.originalQueryValue = originalQueryValue; } - + @Override public int hashCode() { return new HashCodeBuilder(17, 37).append(fieldName).append(fieldValue).toHashCode(); } - + @Override public String toString() { return this.fieldName + " " + this.fieldValue; } - + @Override public boolean equals(Object other) { - if (other == null) + if (other == null) { return false; + } if (other instanceof MapKey) { MapKey o = (MapKey) other; return (this.fieldName.equals(o.fieldName) && this.fieldValue.equals(o.fieldValue)); - } else + } else { return false; + } } - + + @Override public int compareTo(MapKey o) { int result = this.fieldName.compareTo(o.fieldName); if (result != 0) { @@ -136,87 +140,88 @@ public class RangeCalculator extends QueryParser { return result; } } - + } - + /** * Container used to hold the lower and upper bound of a range - * + * */ public static class RangeBounds { private String originalLower = null; private Text lower = null; private String originalUpper = null; private Text upper = null; - + public Text getLower() { return lower; } - + public Text getUpper() { return upper; } - + public void setLower(Text lower) { this.lower = lower; } - + public void setUpper(Text upper) { this.upper = upper; } - + public String getOriginalLower() { return originalLower; } - + public String getOriginalUpper() { return originalUpper; } - + public void setOriginalLower(String originalLower) { this.originalLower = originalLower; } - + public void setOriginalUpper(String originalUpper) { this.originalUpper = originalUpper; } } - + /** - * - * Object that is used to hold ranges found in the index. Subclasses may compute the final range set in various ways. + * + * Object that is used to hold ranges found in the index. Subclasses may compute the final range + * set in various ways. */ protected static class TermRange implements Comparable { - + private String fieldName = null; private Object fieldValue = null; - private Set ranges = new TreeSet(); - + private Set ranges = new TreeSet<>(); + public TermRange(String name, Object fieldValue) { this.fieldName = name; this.fieldValue = fieldValue; } - + public String getFieldName() { return this.fieldName; } - + public Object getFieldValue() { return this.fieldValue; } - + public void addAll(Set r) { ranges.addAll(r); } - + public void add(Range r) { ranges.add(r); } - + public Set getRanges() { return ranges; } - + @Override public String toString() { ToStringBuilder tsb = new ToStringBuilder(this); @@ -225,7 +230,8 @@ public class RangeCalculator extends QueryParser { tsb.append("ranges", ranges); return tsb.toString(); } - + + @Override public int compareTo(TermRange o) { int result = this.fieldName.compareTo(o.fieldName); if (result == 0) { @@ -235,7 +241,7 @@ public class RangeCalculator extends QueryParser { } } } - + /** * Object used to store context information as the AST is being traversed. */ @@ -246,11 +252,11 @@ public class RangeCalculator extends QueryParser { TermRange lastRange = null; String lastProcessedTerm = null; } - + protected static Logger log = Logger.getLogger(RangeCalculator.class); private static String WILDCARD = ".*"; private static String SINGLE_WILDCARD = "\\."; - + protected Connector c; protected Authorizations auths; protected Multimap indexedTerms; @@ -258,33 +264,23 @@ public class RangeCalculator extends QueryParser { protected String indexTableName; protected String reverseIndexTableName; protected int queryThreads = 8; - + /* final results of index lookups, ranges for the shard table */ protected Set result = null; /* map of field names to values found in the index */ protected Multimap indexEntries = HashMultimap.create(); /* map of value in the index to the original query values */ - protected Map indexValues = new HashMap(); + protected Map indexValues = new HashMap<>(); /* map of values in the query to map keys used */ protected Multimap originalQueryValues = HashMultimap.create(); /* map of field name to cardinality */ - protected Map termCardinalities = new HashMap(); + protected Map termCardinalities = new HashMap<>(); /* cached results of all ranges found global index lookups */ - protected Map globalIndexResults = new HashMap(); - - /** - * - * @param c - * @param auths - * @param indexedTerms - * @param terms - * @param query - * @param logic - * @param typeFilter - * @throws ParseException - */ - public void execute(Connector c, Authorizations auths, Multimap indexedTerms, Multimap terms, String query, - AbstractQueryLogic logic, Set typeFilter) throws ParseException { + protected Map globalIndexResults = new HashMap<>(); + + public void execute(Connector c, Authorizations auths, Multimap indexedTerms, + Multimap terms, String query, AbstractQueryLogic logic, + Set typeFilter) throws ParseException { super.execute(query); this.c = c; this.auths = auths; @@ -293,21 +289,26 @@ public class RangeCalculator extends QueryParser { this.indexTableName = logic.getIndexTableName(); this.reverseIndexTableName = logic.getReverseIndexTableName(); this.queryThreads = logic.getQueryThreads(); - - Map> indexRanges = new HashMap>(); - Map> trailingWildcardRanges = new HashMap>(); - Map> leadingWildcardRanges = new HashMap>(); - Map rangeMap = new HashMap(); - + + Map> indexRanges = new HashMap<>(); + Map> trailingWildcardRanges = new HashMap<>(); + Map> leadingWildcardRanges = new HashMap<>(); + Map rangeMap = new HashMap<>(); + // Here we iterate over all of the terms in the query to determine if they are an equivalence, // wildcard, or range type operator for (Entry entry : terms.entries()) { if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTEQNode.class)) - || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTERNode.class)) - || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTLTNode.class)) - || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTLENode.class)) - || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTGTNode.class)) - || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTGENode.class))) { + || entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTERNode.class)) + || entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTLTNode.class)) + || entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTLENode.class)) + || entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTGTNode.class)) + || entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTGENode.class))) { // If this term is not in the set of indexed terms, then bail if (!indexedTerms.containsKey(entry.getKey())) { termCardinalities.put(entry.getKey().toUpperCase(), 0L); @@ -319,67 +320,81 @@ public class RangeCalculator extends QueryParser { continue; } // In the case where we are looking for 'null', then skip. - if (null == entry.getValue().getValue() || ((String) entry.getValue().getValue()).equals("null")) { + if (null == entry.getValue().getValue() + || ((String) entry.getValue().getValue()).equals("null")) { termCardinalities.put(entry.getKey().toUpperCase(), 0L); continue; } - + // Remove the begin and end ' marks String value = null; - if (((String) entry.getValue().getValue()).startsWith("'") && ((String) entry.getValue().getValue()).endsWith("'")) - value = ((String) entry.getValue().getValue()).substring(1, ((String) entry.getValue().getValue()).length() - 1); - else + if (((String) entry.getValue().getValue()).startsWith("'") + && ((String) entry.getValue().getValue()).endsWith("'")) { + value = ((String) entry.getValue().getValue()).substring(1, + ((String) entry.getValue().getValue()).length() - 1); + } else { value = (String) entry.getValue().getValue(); + } // The entries in the index are normalized for (Normalizer normalizer : indexedTerms.get(entry.getKey())) { String normalizedFieldValue = normalizer.normalizeFieldValue(null, value); Text fieldValue = new Text(normalizedFieldValue); Text fieldName = new Text(entry.getKey().toUpperCase()); - + // EQUALS - if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTEQNode.class))) { + if (entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTEQNode.class))) { Key startRange = new Key(fieldValue, fieldName); Range r = new Range(startRange, true, startRange.followingKey(PartialKey.ROW), true); - + MapKey key = new MapKey(fieldName.toString(), fieldValue.toString()); key.setOriginalQueryValue(value); this.originalQueryValues.put(value, key); - if (!indexRanges.containsKey(key)) + if (!indexRanges.containsKey(key)) { indexRanges.put(key, new HashSet()); + } indexRanges.get(key).add(r); // WILDCARD - } else if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTERNode.class))) { - // This is a wildcard query using regex. We can only support leading and trailing wildcards at this time. Leading - // wildcards will need be reversed and sent to the global reverse index. Trailing wildcard queries will be sent to the - // global index. In all cases, the range for the wilcard will be the range of possible UNICODE codepoints, hex 0 to 10FFFF. + } else if (entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTERNode.class))) { + // This is a wildcard query using regex. We can only support leading and trailing + // wildcards at this time. Leading + // wildcards will need be reversed and sent to the global reverse index. Trailing + // wildcard queries will be sent to the + // global index. In all cases, the range for the wilcard will be the range of possible + // UNICODE codepoints, hex 0 to 10FFFF. int loc = normalizedFieldValue.indexOf(WILDCARD); - if (-1 == loc) + if (-1 == loc) { loc = normalizedFieldValue.indexOf(SINGLE_WILDCARD); + } if (-1 == loc) { // Then no wildcard in the query? Treat like the equals case above. Key startRange = new Key(fieldValue, fieldName); Range r = new Range(startRange, true, startRange.followingKey(PartialKey.ROW), true); - + MapKey key = new MapKey(fieldName.toString(), fieldValue.toString()); key.setOriginalQueryValue(value); this.originalQueryValues.put(value, key); - if (!indexRanges.containsKey(key)) + if (!indexRanges.containsKey(key)) { indexRanges.put(key, new HashSet()); + } indexRanges.get(key).add(r); } else { if (loc == 0) { - // Then we have a leading wildcard, reverse the term and use the global reverse index. + // Then we have a leading wildcard, reverse the term and use the global reverse + // index. StringBuilder buf = new StringBuilder(normalizedFieldValue.substring(2)); normalizedFieldValue = buf.reverse().toString(); Key startRange = new Key(new Text(normalizedFieldValue + "\u0000"), fieldName); Key endRange = new Key(new Text(normalizedFieldValue + "\u10FFFF"), fieldName); Range r = new Range(startRange, true, endRange, true); - + MapKey key = new MapKey(fieldName.toString(), normalizedFieldValue); key.setOriginalQueryValue(value); this.originalQueryValues.put(value, key); - if (!leadingWildcardRanges.containsKey(key)) + if (!leadingWildcardRanges.containsKey(key)) { leadingWildcardRanges.put(key, new HashSet()); + } leadingWildcardRanges.get(key).add(r); } else if (loc == (normalizedFieldValue.length() - 2)) { normalizedFieldValue = normalizedFieldValue.substring(0, loc); @@ -387,39 +402,48 @@ public class RangeCalculator extends QueryParser { Key startRange = new Key(new Text(normalizedFieldValue + "\u0000"), fieldName); Key endRange = new Key(new Text(normalizedFieldValue + "\u10FFFF"), fieldName); Range r = new Range(startRange, true, endRange, true); - + MapKey key = new MapKey(fieldName.toString(), normalizedFieldValue); key.setOriginalQueryValue(value); this.originalQueryValues.put(value, key); - if (!trailingWildcardRanges.containsKey(key)) + if (!trailingWildcardRanges.containsKey(key)) { trailingWildcardRanges.put(key, new HashSet()); + } trailingWildcardRanges.get(key).add(r); } else { - // throw new RuntimeException("Unsupported wildcard location. Only trailing or leading wildcards are supported: " + normalizedFieldValue); - // Don't throw an exception, there must be a wildcard in the query, we'll treat it as a filter on the results since it is not + // throw new RuntimeException("Unsupported wildcard location. Only trailing or + // leading wildcards are supported: " + normalizedFieldValue); + // Don't throw an exception, there must be a wildcard in the query, we'll treat it + // as a filter on the results since it is not // leading or trailing. } } // RANGES - } else if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTGTNode.class)) - || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTGENode.class))) { + } else if (entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTGTNode.class)) + || entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTGENode.class))) { // Then we have a lower bound to a range query - if (!rangeMap.containsKey(fieldName)) + if (!rangeMap.containsKey(fieldName)) { rangeMap.put(fieldName, new RangeBounds()); + } rangeMap.get(fieldName).setLower(fieldValue); rangeMap.get(fieldName).setOriginalLower(value); - } else if (entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTLTNode.class)) - || entry.getValue().getOperator().equals(JexlOperatorConstants.getOperator(ASTLENode.class))) { + } else if (entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTLTNode.class)) + || entry.getValue().getOperator() + .equals(JexlOperatorConstants.getOperator(ASTLENode.class))) { // Then we have an upper bound to a range query - if (!rangeMap.containsKey(fieldName)) + if (!rangeMap.containsKey(fieldName)) { rangeMap.put(fieldName, new RangeBounds()); + } rangeMap.get(fieldName).setUpper(fieldValue); rangeMap.get(fieldName).setOriginalUpper(value); } } } } - + // INDEX RANGE QUERY // Now that we have figured out the range bounds, create the index ranges. for (Entry entry : rangeMap.entrySet()) { @@ -437,156 +461,169 @@ public class RangeCalculator extends QueryParser { Key startRange = new Key(lower, entry.getKey()); Key endRange = new Key(upper, entry.getKey()); Range r = new Range(startRange, true, endRange, true); - // For the range queries we need to query the global index and then handle the results a little differently. - Map> ranges = new HashMap>(); + // For the range queries we need to query the global index and then handle the results a + // little differently. + Map> ranges = new HashMap<>(); MapKey key = new MapKey(entry.getKey().toString(), entry.getValue().getLower().toString()); key.setOriginalQueryValue(entry.getValue().getOriginalLower().toString()); this.originalQueryValues.put(entry.getValue().getOriginalLower().toString(), key); ranges.put(key, new HashSet()); ranges.get(key).add(r); - + // Now query the global index and override the field value used in the results map try { - Map lowerResults = queryGlobalIndex(ranges, entry.getKey().toString(), this.indexTableName, false, key, typeFilter); + Map lowerResults = queryGlobalIndex(ranges, entry.getKey().toString(), + this.indexTableName, false, key, typeFilter); // Add the results to the global index results for both the upper and lower field values. - Map upperResults = new HashMap(); + Map upperResults = new HashMap<>(); for (Entry e : lowerResults.entrySet()) { - MapKey key2 = new MapKey(e.getKey().getFieldName(), entry.getValue().getUpper().toString()); + MapKey key2 = + new MapKey(e.getKey().getFieldName(), entry.getValue().getUpper().toString()); key2.setOriginalQueryValue(entry.getValue().getOriginalUpper().toString()); upperResults.put(key2, e.getValue()); this.originalQueryValues.put(entry.getValue().getOriginalUpper(), key2); - + } - + this.globalIndexResults.putAll(lowerResults); this.globalIndexResults.putAll(upperResults); - + } catch (TableNotFoundException e) { log.error("index table not found", e); throw new RuntimeException(" index table not found", e); } } else { - log.warn("Unbounded range detected, not querying index for it. Field " + entry.getKey().toString() + " in query: " + query); + log.warn("Unbounded range detected, not querying index for it. Field " + + entry.getKey().toString() + " in query: " + query); } } // Now that we have calculated all of the ranges, query the global index. try { - + // Query for the trailing wildcards if we have any for (Entry> trailing : trailingWildcardRanges.entrySet()) { - Map> m = new HashMap>(); + Map> m = new HashMap<>(); m.put(trailing.getKey(), trailing.getValue()); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Ranges for Wildcard Global Index query: " + m.toString()); - this.globalIndexResults.putAll(queryGlobalIndex(m, trailing.getKey().getFieldName(), this.indexTableName, false, trailing.getKey(), typeFilter)); + } + this.globalIndexResults.putAll(queryGlobalIndex(m, trailing.getKey().getFieldName(), + this.indexTableName, false, trailing.getKey(), typeFilter)); } - + // Query for the leading wildcards if we have any for (Entry> leading : leadingWildcardRanges.entrySet()) { - Map> m = new HashMap>(); + Map> m = new HashMap<>(); m.put(leading.getKey(), leading.getValue()); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Ranges for Wildcard Global Reverse Index query: " + m.toString()); - this.globalIndexResults.putAll(queryGlobalIndex(m, leading.getKey().getFieldName(), this.reverseIndexTableName, true, leading.getKey(), typeFilter)); + } + this.globalIndexResults.putAll(queryGlobalIndex(m, leading.getKey().getFieldName(), + this.reverseIndexTableName, true, leading.getKey(), typeFilter)); } - + // Query for the equals case for (Entry> equals : indexRanges.entrySet()) { - Map> m = new HashMap>(); + Map> m = new HashMap<>(); m.put(equals.getKey(), equals.getValue()); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Ranges for Global Index query: " + m.toString()); - this.globalIndexResults.putAll(queryGlobalIndex(m, equals.getKey().getFieldName(), this.indexTableName, false, equals.getKey(), typeFilter)); + } + this.globalIndexResults.putAll(queryGlobalIndex(m, equals.getKey().getFieldName(), + this.indexTableName, false, equals.getKey(), typeFilter)); } } catch (TableNotFoundException e) { log.error("index table not found", e); throw new RuntimeException(" index table not found", e); } - - if (log.isDebugEnabled()) + + if (log.isDebugEnabled()) { log.debug("Ranges from Global Index query: " + globalIndexResults.toString()); - + } + // Now traverse the AST EvaluationContext ctx = new EvaluationContext(); this.getAST().childrenAccept(this, ctx); - + if (ctx.lastRange.getRanges().size() == 0) { log.debug("No resulting range set"); } else { - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Setting range results to: " + ctx.lastRange.getRanges().toString()); + } this.result = ctx.lastRange.getRanges(); } } - + /** - * + * * @return set of ranges to use for the shard table */ public Set getResult() { return result; } - + /** - * + * * @return map of field names to index field values */ public Multimap getIndexEntries() { return indexEntries; } - + public Map getIndexValues() { return indexValues; } - + /** - * + * * @return Cardinality for each field name. */ public Map getTermCardinalities() { return termCardinalities; } - + /** - * - * @param indexRanges - * @param tableName + * * @param isReverse * switch that determines whether or not to reverse the results * @param override * mapKey for wildcard and range queries that specify which mapkey to use in the results * @param typeFilter * - optional list of datatypes - * @throws TableNotFoundException */ - protected Map queryGlobalIndex(Map> indexRanges, String specificFieldName, String tableName, boolean isReverse, - MapKey override, Set typeFilter) throws TableNotFoundException { - + protected Map queryGlobalIndex(Map> indexRanges, + String specificFieldName, String tableName, boolean isReverse, MapKey override, + Set typeFilter) throws TableNotFoundException { + // The results map where the key is the field name and field value and the // value is a set of ranges. The mapkey will always be the field name // and field value that was passed in the original query. The TermRange // will contain the field name and field value found in the index. - Map results = new HashMap(); - + Map results = new HashMap<>(); + // Seed the results map and create the range set for the batch scanner - Set rangeSuperSet = new HashSet(); + Set rangeSuperSet = new HashSet<>(); for (Entry> entry : indexRanges.entrySet()) { rangeSuperSet.addAll(entry.getValue()); TermRange tr = new TermRange(entry.getKey().getFieldName(), entry.getKey().getFieldValue()); - if (null == override) + if (null == override) { results.put(entry.getKey(), tr); - else + } else { results.put(override, tr); + } + } + + if (log.isDebugEnabled()) { + log.debug("Querying global index table: " + tableName + ", range: " + rangeSuperSet.toString() + + " colf: " + specificFieldName); } - - if (log.isDebugEnabled()) - log.debug("Querying global index table: " + tableName + ", range: " + rangeSuperSet.toString() + " colf: " + specificFieldName); BatchScanner bs = this.c.createBatchScanner(tableName, this.auths, this.queryThreads); bs.setRanges(rangeSuperSet); if (null != specificFieldName) { bs.fetchColumnFamily(new Text(specificFieldName)); } - + for (Entry entry : bs) { if (log.isDebugEnabled()) { log.debug("Index entry: " + entry.getKey().toString()); @@ -598,7 +635,7 @@ public class RangeCalculator extends QueryParser { StringBuilder buf = new StringBuilder(entry.getKey().getRow().toString()); fieldValue = buf.reverse().toString(); } - + String fieldName = entry.getKey().getColumnFamily().toString(); // Get the shard id and datatype from the colq String colq = entry.getKey().getColumnQualifier().toString(); @@ -612,8 +649,9 @@ public class RangeCalculator extends QueryParser { shardId = colq; } // Skip this entry if the type is not correct - if (null != datatype && null != typeFilter && !typeFilter.contains(datatype)) + if (null != datatype && null != typeFilter && !typeFilter.contains(datatype)) { continue; + } // Parse the UID.List object from the value Uid.List uidList = null; try { @@ -622,7 +660,7 @@ public class RangeCalculator extends QueryParser { // Don't add UID information, at least we know what shards // it is located in. } - + // Add the count for this shard to the total count for the term. long count = 0; Long storedCount = termCardinalities.get(fieldName); @@ -633,39 +671,43 @@ public class RangeCalculator extends QueryParser { } termCardinalities.put(fieldName, count); this.indexEntries.put(fieldName, fieldValue); - - if (null == override) + + if (null == override) { this.indexValues.put(fieldValue, fieldValue); - else + } else { this.indexValues.put(fieldValue, override.getOriginalQueryValue()); - + } + // Create the keys Text shard = new Text(shardId); if (uidList.getIGNORE()) { // Then we create a scan range that is the entire shard - if (null == override) + if (null == override) { results.get(new MapKey(fieldName, fieldValue)).add(new Range(shard)); - else + } else { results.get(override).add(new Range(shard)); + } } else { // We should have UUIDs, create event ranges for (String uuid : uidList.getUIDList()) { Text cf = new Text(datatype); TextUtil.textAppend(cf, uuid); Key startKey = new Key(shard, cf); - Key endKey = new Key(shard, new Text(cf.toString() + EvaluatingIterator.NULL_BYTE_STRING)); + Key endKey = + new Key(shard, new Text(cf.toString() + EvaluatingIterator.NULL_BYTE_STRING)); Range eventRange = new Range(startKey, true, endKey, false); - if (null == override) + if (null == override) { results.get(new MapKey(fieldName, fieldValue)).add(eventRange); - else + } else { results.get(override).add(eventRange); + } } } } bs.close(); return results; } - + @Override public Object visit(ASTOrNode node, Object data) { boolean previouslyInOrContext = false; @@ -680,31 +722,38 @@ public class RangeCalculator extends QueryParser { // Process both sides of this node. Left branch first node.jjtGetChild(0).jjtAccept(this, ctx); Long leftCardinality = this.termCardinalities.get(ctx.lastProcessedTerm); - if (null == leftCardinality) + if (null == leftCardinality) { leftCardinality = 0L; + } TermRange leftRange = ctx.lastRange; - if (log.isDebugEnabled()) - log.debug("[OR-left] term: " + ctx.lastProcessedTerm + ", cardinality: " + leftCardinality + ", ranges: " + leftRange.getRanges().size()); - + if (log.isDebugEnabled()) { + log.debug("[OR-left] term: " + ctx.lastProcessedTerm + ", cardinality: " + leftCardinality + + ", ranges: " + leftRange.getRanges().size()); + } + // Process the right branch node.jjtGetChild(1).jjtAccept(this, ctx); Long rightCardinality = this.termCardinalities.get(ctx.lastProcessedTerm); - if (null == rightCardinality) + if (null == rightCardinality) { rightCardinality = 0L; + } TermRange rightRange = ctx.lastRange; - if (log.isDebugEnabled()) - log.debug("[OR-right] term: " + ctx.lastProcessedTerm + ", cardinality: " + rightCardinality + ", ranges: " + rightRange.getRanges().size()); - + if (log.isDebugEnabled()) { + log.debug("[OR-right] term: " + ctx.lastProcessedTerm + ", cardinality: " + rightCardinality + + ", ranges: " + rightRange.getRanges().size()); + } + // reset the state - if (null != data && !previouslyInOrContext) + if (null != data && !previouslyInOrContext) { ctx.inOrContext = false; + } // Add the ranges for the left and right branches to a TreeSet to sort them - Set ranges = new TreeSet(); + Set ranges = new TreeSet<>(); ranges.addAll(leftRange.getRanges()); ranges.addAll(rightRange.getRanges()); // Now create the union set - Set shardsAdded = new HashSet(); - Set returnSet = new HashSet(); + Set shardsAdded = new HashSet<>(); + Set returnSet = new HashSet<>(); for (Range r : ranges) { if (!shardsAdded.contains(r.getStartKey().getRow())) { // Only add ranges with a start key for the entire shard. @@ -714,21 +763,23 @@ public class RangeCalculator extends QueryParser { returnSet.add(r); } else { // if (log.isTraceEnabled()) - log.info("Skipping event specific range: " + r.toString() + " because shard range has already been added: " + log.info("Skipping event specific range: " + r.toString() + + " because shard range has already been added: " + shardsAdded.contains(r.getStartKey().getRow())); } } // Clear the ranges from the context and add the result in its place TermRange orRange = new TermRange("OR_RESULT", "foo"); orRange.addAll(returnSet); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("[OR] results: " + orRange.getRanges().toString()); + } ctx.lastRange = orRange; ctx.lastProcessedTerm = "OR_RESULT"; this.termCardinalities.put("OR_RESULT", (leftCardinality + rightCardinality)); return null; } - + @Override public Object visit(ASTAndNode node, Object data) { boolean previouslyInAndContext = false; @@ -744,44 +795,53 @@ public class RangeCalculator extends QueryParser { node.jjtGetChild(0).jjtAccept(this, ctx); String leftTerm = ctx.lastProcessedTerm; Long leftCardinality = this.termCardinalities.get(leftTerm); - if (null == leftCardinality) + if (null == leftCardinality) { leftCardinality = 0L; + } TermRange leftRange = ctx.lastRange; - if (log.isDebugEnabled()) - log.debug("[AND-left] term: " + ctx.lastProcessedTerm + ", cardinality: " + leftCardinality + ", ranges: " + leftRange.getRanges().size()); - + if (log.isDebugEnabled()) { + log.debug("[AND-left] term: " + ctx.lastProcessedTerm + ", cardinality: " + leftCardinality + + ", ranges: " + leftRange.getRanges().size()); + } + // Process the right branch node.jjtGetChild(1).jjtAccept(this, ctx); String rightTerm = ctx.lastProcessedTerm; Long rightCardinality = this.termCardinalities.get(rightTerm); - if (null == rightCardinality) + if (null == rightCardinality) { rightCardinality = 0L; + } TermRange rightRange = ctx.lastRange; - if (log.isDebugEnabled()) - log.debug("[AND-right] term: " + ctx.lastProcessedTerm + ", cardinality: " + rightCardinality + ", ranges: " + rightRange.getRanges().size()); - + if (log.isDebugEnabled()) { + log.debug("[AND-right] term: " + ctx.lastProcessedTerm + ", cardinality: " + rightCardinality + + ", ranges: " + rightRange.getRanges().size()); + } + // reset the state - if (null != data && !previouslyInAndContext) + if (null != data && !previouslyInAndContext) { ctx.inAndContext = false; - + } + long card = 0L; TermRange andRange = new TermRange("AND_RESULT", "foo"); if ((leftCardinality > 0 && leftCardinality <= rightCardinality) || rightCardinality == 0) { card = leftCardinality; andRange.addAll(leftRange.getRanges()); - } else if ((rightCardinality > 0 && rightCardinality <= leftCardinality) || leftCardinality == 0) { + } else if ((rightCardinality > 0 && rightCardinality <= leftCardinality) + || leftCardinality == 0) { card = rightCardinality; andRange.addAll(rightRange.getRanges()); } - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("[AND] results: " + andRange.getRanges().toString()); + } ctx.lastRange = andRange; ctx.lastProcessedTerm = "AND_RESULT"; this.termCardinalities.put("AND_RESULT", card); - + return null; } - + @Override public Object visit(ASTEQNode node, Object data) { StringBuilder fieldName = new StringBuilder(); @@ -790,38 +850,44 @@ public class RangeCalculator extends QueryParser { Object left = node.jjtGetChild(0).jjtAccept(this, data); Object right = node.jjtGetChild(1).jjtAccept(this, data); // Ignore functions in the query - if (left instanceof FunctionResult || right instanceof FunctionResult) + if (left instanceof FunctionResult || right instanceof FunctionResult) { return null; + } decodeResults(left, right, fieldName, value); // We need to check to see if we are in a NOT context. If so, // then we need to reverse the negation. boolean negated = false; if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; - if (ctx.inNotContext) + if (ctx.inNotContext) { negated = !negated; + } } - QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject()); + QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), + value.getObject()); termsCopy.put(fieldName.toString(), term); // Get the terms from the global index // Remove the begin and end ' marks String termValue = null; - if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) + if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) { termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1); - else + } else { termValue = (String) term.getValue(); + } // Get the values found in the index for this query term TermRange ranges = null; for (MapKey key : this.originalQueryValues.get(termValue)) { if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) { ranges = this.globalIndexResults.get(key); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Results for cached index ranges for key: " + key + " are " + ranges); + } } } // If no result for this field name and value, then add empty range - if (null == ranges) - ranges = new TermRange(fieldName.toString(), (String) term.getValue()); + if (null == ranges) { + ranges = new TermRange(fieldName.toString(), term.getValue()); + } if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; ctx.lastRange = ranges; @@ -829,7 +895,7 @@ public class RangeCalculator extends QueryParser { } return null; } - + @Override public Object visit(ASTNENode node, Object data) { StringBuilder fieldName = new StringBuilder(); @@ -838,20 +904,24 @@ public class RangeCalculator extends QueryParser { Object left = node.jjtGetChild(0).jjtAccept(this, data); Object right = node.jjtGetChild(1).jjtAccept(this, data); // Ignore functions in the query - if (left instanceof FunctionResult || right instanceof FunctionResult) + if (left instanceof FunctionResult || right instanceof FunctionResult) { return null; + } decodeResults(left, right, fieldName, value); // We need to check to see if we are in a NOT context. If so, // then we need to reverse the negation. boolean negated = true; if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; - if (ctx.inNotContext) + if (ctx.inNotContext) { negated = !negated; + } } - if (negated) + if (negated) { negatedTerms.add(fieldName.toString()); - QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject()); + } + QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), + value.getObject()); termsCopy.put(fieldName.toString(), term); // We can only use the global index for equality, put in fake results if (null != data && data instanceof EvaluationContext) { @@ -862,7 +932,7 @@ public class RangeCalculator extends QueryParser { } return null; } - + @Override public Object visit(ASTLTNode node, Object data) { StringBuilder fieldName = new StringBuilder(); @@ -871,38 +941,44 @@ public class RangeCalculator extends QueryParser { Object left = node.jjtGetChild(0).jjtAccept(this, data); Object right = node.jjtGetChild(1).jjtAccept(this, data); // Ignore functions in the query - if (left instanceof FunctionResult || right instanceof FunctionResult) + if (left instanceof FunctionResult || right instanceof FunctionResult) { return null; + } decodeResults(left, right, fieldName, value); // We need to check to see if we are in a NOT context. If so, // then we need to reverse the negation. boolean negated = false; if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; - if (ctx.inNotContext) + if (ctx.inNotContext) { negated = !negated; + } } - QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject()); + QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), + value.getObject()); termsCopy.put(fieldName.toString(), term); // Get the terms from the global index // Remove the begin and end ' marks String termValue = null; - if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) + if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) { termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1); - else + } else { termValue = (String) term.getValue(); + } // Get the values found in the index for this query term TermRange ranges = null; for (MapKey key : this.originalQueryValues.get(termValue)) { if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) { ranges = this.globalIndexResults.get(key); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Results for cached index ranges for key: " + key + " are " + ranges); + } } } // If no result for this field name and value, then add empty range - if (null == ranges) - ranges = new TermRange(fieldName.toString(), (String) term.getValue()); + if (null == ranges) { + ranges = new TermRange(fieldName.toString(), term.getValue()); + } if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; ctx.lastRange = ranges; @@ -910,7 +986,7 @@ public class RangeCalculator extends QueryParser { } return null; } - + @Override public Object visit(ASTGTNode node, Object data) { StringBuilder fieldName = new StringBuilder(); @@ -919,38 +995,44 @@ public class RangeCalculator extends QueryParser { Object left = node.jjtGetChild(0).jjtAccept(this, data); Object right = node.jjtGetChild(1).jjtAccept(this, data); // Ignore functions in the query - if (left instanceof FunctionResult || right instanceof FunctionResult) + if (left instanceof FunctionResult || right instanceof FunctionResult) { return null; + } decodeResults(left, right, fieldName, value); // We need to check to see if we are in a NOT context. If so, // then we need to reverse the negation. boolean negated = false; if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; - if (ctx.inNotContext) + if (ctx.inNotContext) { negated = !negated; + } } - QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject()); + QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), + value.getObject()); termsCopy.put(fieldName.toString(), term); // Get the terms from the global index // Remove the begin and end ' marks String termValue = null; - if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) + if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) { termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1); - else + } else { termValue = (String) term.getValue(); + } // Get the values found in the index for this query term TermRange ranges = null; for (MapKey key : this.originalQueryValues.get(termValue)) { if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) { ranges = this.globalIndexResults.get(key); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Results for cached index ranges for key: " + key + " are " + ranges); + } } } // If no result for this field name and value, then add empty range - if (null == ranges) - ranges = new TermRange(fieldName.toString(), (String) term.getValue()); + if (null == ranges) { + ranges = new TermRange(fieldName.toString(), term.getValue()); + } if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; ctx.lastRange = ranges; @@ -958,7 +1040,7 @@ public class RangeCalculator extends QueryParser { } return null; } - + @Override public Object visit(ASTLENode node, Object data) { StringBuilder fieldName = new StringBuilder(); @@ -967,38 +1049,44 @@ public class RangeCalculator extends QueryParser { Object left = node.jjtGetChild(0).jjtAccept(this, data); Object right = node.jjtGetChild(1).jjtAccept(this, data); // Ignore functions in the query - if (left instanceof FunctionResult || right instanceof FunctionResult) + if (left instanceof FunctionResult || right instanceof FunctionResult) { return null; + } decodeResults(left, right, fieldName, value); // We need to check to see if we are in a NOT context. If so, // then we need to reverse the negation. boolean negated = false; if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; - if (ctx.inNotContext) + if (ctx.inNotContext) { negated = !negated; + } } - QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject()); + QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), + value.getObject()); termsCopy.put(fieldName.toString(), term); // Get the terms from the global index // Remove the begin and end ' marks String termValue = null; - if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) + if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) { termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1); - else + } else { termValue = (String) term.getValue(); + } // Get the values found in the index for this query term TermRange ranges = null; for (MapKey key : this.originalQueryValues.get(termValue)) { if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) { ranges = this.globalIndexResults.get(key); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Results for cached index ranges for key: " + key + " are " + ranges); + } } } // If no result for this field name and value, then add empty range - if (null == ranges) - ranges = new TermRange(fieldName.toString(), (String) term.getValue()); + if (null == ranges) { + ranges = new TermRange(fieldName.toString(), term.getValue()); + } if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; ctx.lastRange = ranges; @@ -1006,7 +1094,7 @@ public class RangeCalculator extends QueryParser { } return null; } - + @Override public Object visit(ASTGENode node, Object data) { StringBuilder fieldName = new StringBuilder(); @@ -1015,38 +1103,44 @@ public class RangeCalculator extends QueryParser { Object left = node.jjtGetChild(0).jjtAccept(this, data); Object right = node.jjtGetChild(1).jjtAccept(this, data); // Ignore functions in the query - if (left instanceof FunctionResult || right instanceof FunctionResult) + if (left instanceof FunctionResult || right instanceof FunctionResult) { return null; + } decodeResults(left, right, fieldName, value); // We need to check to see if we are in a NOT context. If so, // then we need to reverse the negation. boolean negated = false; if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; - if (ctx.inNotContext) + if (ctx.inNotContext) { negated = !negated; + } } - QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject()); + QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), + value.getObject()); termsCopy.put(fieldName.toString(), term); // Get the terms from the global index // Remove the begin and end ' marks String termValue = null; - if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) + if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) { termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1); - else + } else { termValue = (String) term.getValue(); + } // Get the values found in the index for this query term TermRange ranges = null; for (MapKey key : this.originalQueryValues.get(termValue)) { if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) { ranges = this.globalIndexResults.get(key); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Results for cached index ranges for key: " + key + " are " + ranges); + } } } // If no result for this field name and value, then add empty range - if (null == ranges) - ranges = new TermRange(fieldName.toString(), (String) term.getValue()); + if (null == ranges) { + ranges = new TermRange(fieldName.toString(), term.getValue()); + } if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; ctx.lastRange = ranges; @@ -1054,7 +1148,7 @@ public class RangeCalculator extends QueryParser { } return null; } - + @Override public Object visit(ASTERNode node, Object data) { StringBuilder fieldName = new StringBuilder(); @@ -1063,47 +1157,53 @@ public class RangeCalculator extends QueryParser { Object left = node.jjtGetChild(0).jjtAccept(this, data); Object right = node.jjtGetChild(1).jjtAccept(this, data); // Ignore functions in the query - if (left instanceof FunctionResult || right instanceof FunctionResult) + if (left instanceof FunctionResult || right instanceof FunctionResult) { return null; + } decodeResults(left, right, fieldName, value); // We need to check to see if we are in a NOT context. If so, // then we need to reverse the negation. boolean negated = false; if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; - if (ctx.inNotContext) + if (ctx.inNotContext) { negated = !negated; + } } - QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), value.getObject()); + QueryTerm term = new QueryTerm(negated, JexlOperatorConstants.getOperator(node.getClass()), + value.getObject()); termsCopy.put(fieldName.toString(), term); // Get the terms from the global index // Remove the begin and end ' marks String termValue = null; - if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) + if (((String) term.getValue()).startsWith("'") && ((String) term.getValue()).endsWith("'")) { termValue = ((String) term.getValue()).substring(1, ((String) term.getValue()).length() - 1); - else + } else { termValue = (String) term.getValue(); + } // Get the values found in the index for this query term TermRange ranges = null; for (MapKey key : this.originalQueryValues.get(termValue)) { if (key.getFieldName().equalsIgnoreCase(fieldName.toString())) { ranges = this.globalIndexResults.get(key); - if (log.isDebugEnabled()) + if (log.isDebugEnabled()) { log.debug("Results for cached index ranges for key: " + key + " are " + ranges); + } } } // If no result for this field name and value, then add empty range - if (null == ranges) - ranges = new TermRange(fieldName.toString(), (String) term.getValue()); + if (null == ranges) { + ranges = new TermRange(fieldName.toString(), term.getValue()); + } if (null != data && data instanceof EvaluationContext) { EvaluationContext ctx = (EvaluationContext) data; ctx.lastRange = ranges; ctx.lastProcessedTerm = fieldName.toString(); } - + return null; } - ... 1067 lines suppressed ...