lucene-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From moshebla <...@git.apache.org>
Subject [GitHub] lucene-solr pull request #416: WIP: SOLR-12519
Date Sun, 22 Jul 2018 04:41:56 GMT
Github user moshebla commented on a diff in the pull request:

    https://github.com/apache/lucene-solr/pull/416#discussion_r204227797
  
    --- Diff: solr/core/src/java/org/apache/solr/response/transform/DeeplyNestedChildDocTransformerFactory.java
---
    @@ -0,0 +1,367 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *     http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +package org.apache.solr.response.transform;
    +
    +import java.io.IOException;
    +import java.util.ArrayList;
    +import java.util.List;
    +import java.util.Set;
    +import java.util.regex.Pattern;
    +
    +import org.apache.lucene.document.Document;
    +import org.apache.lucene.index.DocValues;
    +import org.apache.lucene.index.ReaderUtil;
    +import org.apache.lucene.index.SortedDocValues;
    +import org.apache.lucene.search.Query;
    +import org.apache.lucene.search.join.BitSetProducer;
    +import org.apache.lucene.search.join.QueryBitSetProducer;
    +import org.apache.lucene.search.join.ToChildBlockJoinQuery;
    +import org.apache.solr.common.SolrDocument;
    +import org.apache.solr.common.SolrException;
    +import org.apache.solr.common.SolrException.ErrorCode;
    +import org.apache.solr.common.params.SolrParams;
    +import org.apache.solr.common.util.StrUtils;
    +import org.apache.solr.request.SolrQueryRequest;
    +import org.apache.solr.response.DocsStreamer;
    +import org.apache.solr.schema.FieldType;
    +import org.apache.solr.schema.SchemaField;
    +import org.apache.solr.search.DocIterator;
    +import org.apache.solr.search.DocList;
    +import org.apache.solr.search.QParser;
    +import org.apache.solr.search.SolrDocumentFetcher;
    +import org.apache.solr.search.SolrReturnFields;
    +import org.apache.solr.search.SyntaxError;
    +
    +import static org.apache.solr.response.transform.DeeplyNestedChildDocTransformerFactory.PATH_SEP_CHAR;
    +import static org.apache.solr.schema.IndexSchema.NEST_PATH_FIELD_NAME;
    +import static org.apache.solr.schema.IndexSchema.ROOT_FIELD_NAME;
    +
    +/**
    + *
    + * @since solr 4.9
    + *
    + * This transformer returns all descendants of each parent document in a flat list nested
inside the parent document.
    + *
    + *
    + * The "parentFilter" parameter is mandatory.
    + * Optionally you can provide a "childFilter" param to filter out which child documents
should be returned and a
    + * "limit" param which provides an option to specify the number of child documents
    + * to be returned per parent document. By default it's set to 10.
    + *
    + * Examples -
    + * [child parentFilter="fieldName:fieldValue"]
    + * [child parentFilter="fieldName:fieldValue" childFilter="fieldName:fieldValue"]
    + * [child parentFilter="fieldName:fieldValue" childFilter="fieldName:fieldValue" limit=20]
    + */
    +public class DeeplyNestedChildDocTransformerFactory extends TransformerFactory {
    +
    +  public static final String PATH_SEP_CHAR = "/";
    +  public static final String NUM_SEP_CHAR = "#";
    +
    +  @Override
    +  public DocTransformer create(String field, SolrParams params, SolrQueryRequest req)
{
    +    SchemaField uniqueKeyField = req.getSchema().getUniqueKeyField();
    +    if(uniqueKeyField == null) {
    +      throw new SolrException( ErrorCode.BAD_REQUEST,
    +          " ChildDocTransformer requires the schema to have a uniqueKeyField." );
    +    }
    +
    +    String childFilter = params.get( "childFilter" );
    +    String nestPath = null;
    +    int limit = params.getInt( "limit", 10 );
    +
    +    Query childFilterQuery = null;
    +    List<String> split = null;
    +    List<String> splitPath = null;
    +    if(childFilter != null) {
    +      split = StrUtils.splitSmart(childFilter, ':');
    +      splitPath = StrUtils.splitSmart(split.get(0), PATH_SEP_CHAR.charAt(0));
    +      try {
    +        if (childFilter.contains(PATH_SEP_CHAR)) {
    +          nestPath = String.join(PATH_SEP_CHAR, splitPath.subList(0, splitPath.size()
- 1));
    +          // TODO: filter out parents who's childDocs don't match the original childFilter
    +          childFilter = "(" + splitPath.get(splitPath.size() - 1) + ":\"" + split.get(split.size()
- 1) + "\" AND " + NEST_PATH_FIELD_NAME + ":\"" + nestPath + "/\")";
    +        }
    +        childFilterQuery = QParser.getParser(childFilter, req).getQuery();
    +      } catch (SyntaxError syntaxError) {
    +        throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct child
filter query" );
    +      }
    +    }
    +
    +    String parentFilter = params.get( "parentFilter" );
    +
    +    BitSetProducer parentsFilter = null;
    +
    +    if(parentFilter != null) {
    +      try {
    +        Query parentFilterQuery = QParser.getParser( parentFilter, req).getQuery();
    +        //TODO shouldn't we try to use the Solr filter cache, and then ideally implement
    +        //  BitSetProducer over that?
    +        // DocSet parentDocSet = req.getSearcher().getDocSet(parentFilterQuery);
    +        // then return BitSetProducer with custom BitSet impl accessing the docSet
    +        parentsFilter = new QueryBitSetProducer(parentFilterQuery);
    +      } catch (SyntaxError syntaxError) {
    +        throw new SolrException( ErrorCode.BAD_REQUEST, "Failed to create correct parent
filter query" );
    +      }
    +    } else {
    +      String sRootFilter = "{!frange l=1 u=1}strdist(" + req.getSchema().getUniqueKeyField().getName()
+ "," + ROOT_FIELD_NAME + ",edit)";
    +      try {
    +        Query parentFilterQuery = QParser.getParser(sRootFilter, req).getQuery();
    +        //TODO shouldn't we try to use the Solr filter cache, and then ideally implement
    +        //  BitSetProducer over that?
    +        // DocSet parentDocSet = req.getSearcher().getDocSet(parentFilterQuery);
    +        // then return BitSetProducer with custom BitSet impl accessing the docSet
    +        parentsFilter = new QueryBitSetProducer(parentFilterQuery);
    +      } catch (SyntaxError syntaxError) {
    +        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Failed to create
correct parent filter query" );
    +      }
    +    }
    +
    +    if(childFilterQuery == null) {
    +      return new DeeplyNestedChildDocTransformer(field, parentsFilter, req, limit);
    +    }
    +    return new DeeplyNestedFilterChildDocTransformer(field, parentsFilter, req, childFilterQuery,
nestPath!=null? generatePattern(splitPath): null, limit);
    +  }
    +
    +  private Pattern generatePattern(List<String> pathList) {
    +    if(pathList.size() <= 2) {
    +      return Pattern.compile(pathList.get(0) + NUM_SEP_CHAR + "\\d");
    +    }
    +    return Pattern.compile(String.join(NUM_SEP_CHAR + "\\d" + PATH_SEP_CHAR, pathList.subList(0,
pathList.size() - 1)) + NUM_SEP_CHAR + "\\d");
    +  }
    +}
    +
    +class DeeplyNestedFilterChildDocTransformer extends DeeplyNestedChildDocTransformerBase
{
    +
    +  private Query childFilterQuery;
    +  private Pattern nestPathMatcher;
    +
    +  public DeeplyNestedFilterChildDocTransformer( String name, final BitSetProducer parentsFilter,
    +                              final SolrQueryRequest req, final Query childFilterQuery,
Pattern pathPattern, int limit) {
    +    super(name, parentsFilter, req, limit);
    +    this.childFilterQuery = childFilterQuery;
    +    this.nestPathMatcher = pathPattern;
    +  }
    +
    +  @Override
    +  public void transform(SolrDocument rootDoc, int docid) {
    --- End diff --
    
    Sure thing, any help would be welcome.
    This is a pretty rough draft, so I will try to work on this ASAP before I add more logic
to the transformer.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: dev-help@lucene.apache.org


Mime
View raw message