lucene-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From GitBox <...@apache.org>
Subject [GitHub] [lucene-solr] mkhludnev commented on a change in pull request #1034: SOLR-13863: payload query function now handles string encoded payload field (delimited_payloads_string)
Date Thu, 28 Nov 2019 15:27:15 GMT
mkhludnev commented on a change in pull request #1034: SOLR-13863: payload query function now
handles string encoded payload field (delimited_payloads_string)
URL: https://github.com/apache/lucene-solr/pull/1034#discussion_r351837061
 
 

 ##########
 File path: solr/core/src/java/org/apache/solr/search/StringPayloadValueSource.java
 ##########
 @@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.search;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.queries.function.FunctionValues;
+import org.apache.lucene.queries.function.ValueSource;
+import org.apache.lucene.queries.function.docvalues.StrDocValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.FieldComparator;
+import org.apache.lucene.search.FieldComparatorSource;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.SimpleFieldComparator;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.util.BytesRef;
+
+public class StringPayloadValueSource extends ValueSource {
+  protected final String field;
+  protected final String val;
+  protected final String indexedField;
+  protected final BytesRef indexedBytes;
+  protected final ValueSource defaultValueSource;
+
+  public StringPayloadValueSource(String field, String val, String indexedField, BytesRef
indexedBytes, ValueSource defaultValueSource) {
+    this.field = field;
+    this.val = val;
+    this.indexedField = indexedField;
+    this.indexedBytes = indexedBytes;
+    this.defaultValueSource = defaultValueSource;
+  }
+
+  public SortField getSortField(boolean reverse) {
+    return new StringPayloadValueSourceSortField(reverse);
+  }
+
+  @Override
+  public FunctionValues getValues(Map context, LeafReaderContext readerContext) throws IOException
{
+
+    final Terms terms = readerContext.reader().terms(indexedField);
+
+    FunctionValues defaultValues = defaultValueSource.getValues(context, readerContext);
+
+    // copied the bulk of this from TFValueSource - TODO: this is a very repeated pattern
- base-class this advance logic stuff?
+    return new StrDocValues(this) {
+      PostingsEnum docs;
+      int atDoc;
+      int lastDocRequested = -1;
+      String docValue = null;
+
+      {
+        reset();
+      }
+
+      public void reset() throws IOException {
+        // no one should call us for deleted docs?
+
+        if (terms != null) {
+          final TermsEnum termsEnum = terms.iterator();
+          if (termsEnum.seekExact(indexedBytes)) {
+            docs = termsEnum.postings(null, PostingsEnum.ALL);
+          } else {
+            docs = null;
+          }
+        } else {
+          docs = null;
+        }
+
+        if (docs == null) {
+          // dummy PostingsEnum so floatVal() can work
+          // when would this be called?  if field/val did not match?  this is called for
every doc?  create once and cache?
+          docs = new PostingsEnum() {
+            @Override
+            public int freq() {
+              return 0;
+            }
+
+            @Override
+            public int nextPosition() throws IOException {
+              return -1;
+            }
+
+            @Override
+            public int startOffset() throws IOException {
+              return -1;
+            }
+
+            @Override
+            public int endOffset() throws IOException {
+              return -1;
+            }
+
+            @Override
+            public BytesRef getPayload() throws IOException {
+              return null;
+            }
+
+            @Override
+            public int docID() {
+              return DocIdSetIterator.NO_MORE_DOCS;
+            }
+
+            @Override
+            public int nextDoc() {
+              return DocIdSetIterator.NO_MORE_DOCS;
+            }
+
+            @Override
+            public int advance(int target) {
+              return DocIdSetIterator.NO_MORE_DOCS;
+            }
+
+            @Override
+            public long cost() {
+              return 0;
+            }
+          };
+        }
+        atDoc = -1;
+      }
+
+      @Override
+      public String strVal(int doc) {
+        try {
+          if (doc < lastDocRequested) {
+            // out-of-order access.... reset
+            reset();
+          } else if (doc == lastDocRequested) {
+            return docValue;
+          }
+
+          lastDocRequested = doc;
+
+          if (atDoc < doc) {
+            atDoc = docs.advance(doc);
+          }
+
+          if (atDoc > doc) {
+            // term doesn't match this document... either because we hit the
+            // end, or because the next doc is after this doc.
+            docValue = defaultValues.strVal(doc);
+            return docValue;
+          }
+
+          // a match!
+          int freq = docs.freq();
+          for (int i = 0; i < freq; i++) {
+            docs.nextPosition();
+            BytesRef payload = docs.getPayload();
+            if (payload != null) {
+              return bytesRefToString(payload);
+            }
+          }
+          docValue = defaultValues.strVal(doc);
+          return docValue;
+        } catch (IOException e) {
+          throw new RuntimeException("caught exception in function " + description() + "
: doc=" + doc, e);
+        }
+      }
+    };
+  }
+
+  private String bytesRefToString(BytesRef payload) {
+    byte[] bytes = new byte[payload.length];
+    System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length);
+    String ret = new String(bytes);
+    return ret;
+  }
+
+  // TODO: should this be formalized at the ValueSource level?  Seems to be the convention
+  public String name() {
+    return "spayload";
+  }
+
+  @Override
+  public String description() {
+    return name() + '(' + field + ',' + val + ',' + defaultValueSource.toString() + ')';
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    StringPayloadValueSource that = (StringPayloadValueSource) o;
+
+    if (!indexedField.equals(that.indexedField)) return false;
+    if (indexedBytes != null ? !indexedBytes.equals(that.indexedBytes) : that.indexedBytes
!= null) return false;
+    return defaultValueSource.equals(that.defaultValueSource);
+
+  }
+
+  @Override
+  public int hashCode() {
+    int result = indexedField.hashCode();
+    result = 31 * result + (indexedBytes != null ? indexedBytes.hashCode() : 0);
+    result = 31 * result + defaultValueSource.hashCode();
+    return result;
+  }
+
+  class StringPayloadValueSourceSortField extends SortField {
 
 Review comment:
   It doesn't seem like something unique, can't it be reused from other string ValsSourc?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@lucene.apache.org
For additional commands, e-mail: issues-help@lucene.apache.org


Mime
View raw message