lucene-java-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "McKinley, James T" <james.mckin...@cengage.com>
Subject ToChildBlockJoinQuery question
Date Wed, 21 Jan 2015 15:00:22 GMT
Hi,

I'm attempting to use ToChildBlockJoinQuery in Lucene 4.8.1 by following Mike McCandless'
blog post:

http://blog.mikemccandless.com/2012/01/searching-relational-content-with.html

I have a set of child documents which are named works and a set of parent documents which
are named persons that are the creators of the named works.  The parent document has a nationality
and the child document does not.  I want to query the children (named works) limiting by the
nationality of the parent (named person).  I've indexed the documents as follows (I'm pulling
the docs from an existing index):

	private void createNamedWorkIndex(String srcIndexPath, String destIndexPath) throws IOException
{
		FSDirectory srcDir = FSDirectory.open(new File(srcIndexPath));
		FSDirectory destDir = FSDirectory.open(new File(destIndexPath));
		
		IndexReader reader = DirectoryReader.open(srcDir);
		
		Version version = Version.LUCENE_48;
		IndexWriterConfig conf = new IndexWriterConfig(version, new StandardTextAnalyzer(version));
		
		Set<String> crids = getCreatorIds(reader);
		
		String[] crida = crids.toArray(new String[crids.size()]);
		
		int numThreads = 24;
		ExecutorService executor = Executors.newFixedThreadPool(numThreads);
		
		int numCrids = crids.size();
		int batchSize = numCrids / numThreads;
		int remainder = numCrids % numThreads;
		
		System.out.println("Inserting work/creator blocks using " + numThreads + " threads...");
		try (IndexWriter writer = new IndexWriter(destDir, conf)){
			for (int i = 0; i < numThreads; i++) {
				String[] cridRange;
				if (i == numThreads - 1) {
					cridRange = Arrays.copyOfRange(crida, i*batchSize, ((i+1)*batchSize - 1) + remainder);
				} else {
					cridRange = Arrays.copyOfRange(crida, i*batchSize, ((i+1)*batchSize - 1));
				}
				String id = "" + ((char)('A' + i));
				Runnable indexer = new IndexRunnable(id , reader, writer, new HashSet<String>(Arrays.asList(cridRange)));
				executor.execute(indexer);
			}
			executor.shutdown();
			executor.awaitTermination(2, TimeUnit.HOURS);
		} catch (Exception e) {
			executor.shutdownNow();
			throw new RuntimeException(e);
		} finally {
			reader.close();
			srcDir.close();
			destDir.close();
		}
		
		System.out.println("Done!");
	}

	public static class IndexRunnable implements Runnable {
		private String id;
		private IndexReader reader;
		private IndexWriter writer;
		private Set<String> crids;

		public IndexRunnable(String id, IndexReader reader, IndexWriter writer, Set<String>
crids) {
			this.id = id;
			this.reader = reader;
			this.writer = writer;
			this.crids = crids;
		}
		
		@Override
		public void run() {
			IndexSearcher searcher = new IndexSearcher(reader);

			try {
				int count = 0;
				for (String crid : crids) {
					List<Document> docs = new ArrayList<>();
					
					BooleanQuery abidQuery = new BooleanQuery();
					abidQuery.add(new TermQuery(new Term("ABID", crid)), Occur.MUST);
					abidQuery.add(new TermQuery(new Term("AGPR", "true")), Occur.MUST);
					
					TermQuery cridQuery = new TermQuery(new Term("CRID", crid));
					
					TopDocs creatorDocs = searcher.search(abidQuery, Integer.MAX_VALUE);
					TopDocs workDocs = searcher.search(cridQuery, Integer.MAX_VALUE);
					
					for (int i = 0; i < workDocs.scoreDocs.length; i++) {
						docs.add(reader.document(workDocs.scoreDocs[i].doc));
					}
					
					if (creatorDocs.scoreDocs.length > 0) {
						docs.add(reader.document(creatorDocs.scoreDocs[0].doc));
					}
					
					writer.addDocuments(docs);
					if (++count % 100 == 0) {
						System.out.println(id + " = " + count);
						writer.commit();
					}
				}
			} catch (IOException e) {
				throw new RuntimeException(e);
			}
		}
	}

I then attempt to perform a block join query as follows:

	private void runToChildBlockJoinQuery(String indexPath) throws IOException {
		FSDirectory dir = FSDirectory.open(new File(indexPath));
		IndexReader reader = DirectoryReader.open(dir);
		IndexSearcher searcher = new IndexSearcher(reader);
		
		TermQuery parentQuery = new TermQuery(new Term("NT", "american"));
		TermQuery parentFilterQuery = new TermQuery(new Term("AGTY", "np"));
		Filter parentFilter = new CachingWrapperFilter(new QueryWrapperFilter(parentFilterQuery));
		
		ToChildBlockJoinQuery tcbjq = new ToChildBlockJoinQuery(parentQuery, parentFilter, true);
		
		TopDocs worksDocs = searcher.search(tcbjq, 20);
		
		displayWorks(reader, searcher, worksDocs);
	}

and I get the following exception:

Exception in thread "main" java.lang.IllegalStateException: parentFilter must return FixedBitSet;
got org.apache.lucene.util.WAH8DocIdSet@34e671de
	at org.apache.lucene.search.join.ToChildBlockJoinQuery$ToChildBlockJoinWeight.scorer(ToChildBlockJoinQuery.java:148)
	at org.apache.lucene.search.Weight.bulkScorer(Weight.java:131)
	at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:618)
	at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:491)
	at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:448)
	at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:281)
	at org.apache.lucene.search.IndexSearcher.search(IndexSearcher.java:269)
	at BlockJoinQueryTester.runToChildBlockJoinQuery(BlockJoinQueryTester.java:73)
	at BlockJoinQueryTester.main(BlockJoinQueryTester.java:40)

I don't understand what I'm doing wrong and what a "FixedBitSet" is and why I don't get one
out of my filter.  Is FixedBitSet a special kind of OpenBitSet and what does "fixed" mean
in this context?  Thanks for any help.

Jim

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Mime
View raw message