lucene-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From sar...@apache.org
Subject lucene-solr:branch_6_0: SOLR-9058: Makes HashJoinStream and OuterHashJoinStream support different field names in the incoming streams, eg. fieldA=fieldB
Date Fri, 20 May 2016 21:56:07 GMT
Repository: lucene-solr
Updated Branches:
  refs/heads/branch_6_0 93a33e089 -> fbf7d5446


SOLR-9058: Makes HashJoinStream and OuterHashJoinStream support different field names in the
incoming streams, eg. fieldA=fieldB


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/fbf7d544
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/fbf7d544
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/fbf7d544

Branch: refs/heads/branch_6_0
Commit: fbf7d5446d3cdfda0c3906c274ac2c444b419a82
Parents: 93a33e0
Author: Dennis Gove <dpgove@gmail.com>
Authored: Thu May 5 20:30:39 2016 -0400
Committer: Steve Rowe <sarowe@apache.org>
Committed: Fri May 20 17:55:28 2016 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |  3 ++
 .../client/solrj/io/stream/HashJoinStream.java  | 48 ++++++++++++++++----
 .../solrj/io/stream/OuterHashJoinStream.java    | 18 ++++++--
 .../solrj/io/stream/StreamExpressionTest.java   | 24 +++++++++-
 4 files changed, 79 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fbf7d544/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index dbadf9a..7da8ac8 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -69,6 +69,9 @@ Bug Fixes
 * SOLR-9036: Solr slave is doing full replication (entire index) of index after master restart.
   (Lior Sapir, Mark Miller, shalin)
 
+* SOLR-9058: Makes HashJoinStream and OuterHashJoinStream support different field names in
the
+  incoming streams, eg. fieldA=fieldB. (Dennis Gove, Stephan Osthold)
+
 Other Changes
 ----------------------
 * SOLR-7516: Improve javadocs for JavaBinCodec, ObjectResolver and enforce the single-usage
policy.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fbf7d544/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/HashJoinStream.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/HashJoinStream.java
b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/HashJoinStream.java
index 717e8e5..6353367 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/HashJoinStream.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/HashJoinStream.java
@@ -44,7 +44,8 @@ public class HashJoinStream extends TupleStream implements Expressible {
 
   protected TupleStream hashStream;
   protected TupleStream fullStream;
-  protected List<String> hashOn;
+  protected List<String> leftHashOn;
+  protected List<String> rightHashOn;
   protected HashMap<Integer, List<Tuple>> hashedTuples;
   
   protected Tuple workingFullTuple = null;
@@ -94,8 +95,25 @@ public class HashJoinStream extends TupleStream implements Expressible
{
   private void init(TupleStream fullStream, TupleStream hashStream, List<String> hashOn)
throws IOException {
     this.fullStream = fullStream;
     this.hashStream = hashStream;
-    this.hashOn = hashOn;
     this.hashedTuples = new HashMap<>();
+    this.leftHashOn = new ArrayList<>();
+    this.rightHashOn = new ArrayList<>();
+    
+    for(String hasher : hashOn){
+      String[] parts = hasher.split("=");
+      if(1 == parts.length){
+        String field = parts[0].trim();
+        leftHashOn.add(field);
+        rightHashOn.add(field);
+      }
+      else if(2 == parts.length){
+        leftHashOn.add(parts[0].trim());
+        rightHashOn.add(parts[1].trim());
+      }
+      else{
+        throw new IOException(String.format(Locale.ROOT,"Invalid expression %s - invalid
'on' parameter - expecting 1 or more instances if 'field' or 'field=hashedField' but found
'%s'",hasher));
+      }
+    }    
   }
   
   @Override
@@ -114,12 +132,24 @@ public class HashJoinStream extends TupleStream implements Expressible
{
     
     // on
     StringBuilder sb = new StringBuilder();
-    for(String part : hashOn){
+    for(int idx = 0; idx < leftHashOn.size(); ++idx){
       if(sb.length() > 0){ sb.append(","); }
-      sb.append(part);
+      
+      // we know that left and right hashOns are the same size
+      String left = leftHashOn.get(idx);
+      String right = rightHashOn.get(idx);
+      
+      if(left.equals(right)){ 
+        sb.append(left); 
+      }
+      else{
+        sb.append(left);
+        sb.append("=");
+        sb.append(right);
+      }
     }
-    expression.addParameter(new StreamExpressionNamedParameter("on",sb.toString()));
     
+    expression.addParameter(new StreamExpressionNamedParameter("on",sb.toString()));
     return expression;   
   }
 
@@ -141,7 +171,7 @@ public class HashJoinStream extends TupleStream implements Expressible
{
     
     Tuple tuple = hashStream.read();
     while(!tuple.EOF){
-      Integer hash = calculateHash(tuple);
+      Integer hash = calculateHash(tuple, rightHashOn);
       if(null != hash){
         if(hashedTuples.containsKey(hash)){
           hashedTuples.get(hash).add(tuple);
@@ -156,7 +186,7 @@ public class HashJoinStream extends TupleStream implements Expressible
{
     }
   }
   
-  protected Integer calculateHash(Tuple tuple){
+  protected Integer calculateHash(Tuple tuple, List<String> hashOn){
     StringBuilder sb = new StringBuilder();
     for(String part : hashOn){
       Object obj = tuple.get(part);
@@ -164,7 +194,7 @@ public class HashJoinStream extends TupleStream implements Expressible
{
         return null;
       }
       sb.append(obj.toString());
-      sb.append("::"); // this is here to seperate fields
+      sb.append("::"); // this is here to separate fields
     }
     
     return sb.toString().hashCode();
@@ -188,7 +218,7 @@ public class HashJoinStream extends TupleStream implements Expressible
{
       
       // If fullTuple doesn't have a valid hash or if there is no doc to 
       // join with then retry loop - keep going until we find one
-      Integer fullHash = calculateHash(fullTuple);
+      Integer fullHash = calculateHash(fullTuple, leftHashOn);
       if(null == fullHash || !hashedTuples.containsKey(fullHash)){
         continue findNextWorkingFullTuple;
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fbf7d544/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/OuterHashJoinStream.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/OuterHashJoinStream.java
b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/OuterHashJoinStream.java
index 23a99a5..9e82334 100644
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/OuterHashJoinStream.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/io/stream/OuterHashJoinStream.java
@@ -65,9 +65,21 @@ public class OuterHashJoinStream extends HashJoinStream implements Expressible
{
     
     // on
     StringBuilder sb = new StringBuilder();
-    for(String part : hashOn){
+    for(int idx = 0; idx < leftHashOn.size(); ++idx){
       if(sb.length() > 0){ sb.append(","); }
-      sb.append(part);
+      
+      // we know that left and right hashOns are the same size
+      String left = leftHashOn.get(idx);
+      String right = rightHashOn.get(idx);
+      
+      if(left.equals(right)){ 
+        sb.append(left); 
+      }
+      else{
+        sb.append(left);
+        sb.append("=");
+        sb.append(right);
+      }
     }
     expression.addParameter(new StreamExpressionNamedParameter("on",sb.toString()));
     
@@ -87,7 +99,7 @@ public class OuterHashJoinStream extends HashJoinStream implements Expressible
{
       // If fullTuple doesn't have a valid hash or the hash cannot be found in the hashedTuples
then
       // return the tuple from fullStream.
       // This is an outer join so there is no requirement there be a matching value in the
hashed stream
-      Integer fullHash = calculateHash(fullTuple);
+      Integer fullHash = calculateHash(fullTuple, leftHashOn);
       if(null == fullHash || !hashedTuples.containsKey(fullHash)){
         return fullTuple.clone();
       }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/fbf7d544/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
----------------------------------------------------------------------
diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
index e7f57c1..7a8b5e7 100644
--- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
+++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionTest.java
@@ -1430,7 +1430,17 @@ public class StreamExpressionTest extends AbstractFullDistribZkTestBase
{
     stream = new HashJoinStream(expression, factory);
     tuples = getTuples(stream);    
     assert(tuples.size() == 0);
-    
+
+    // Basic test with "on" mapping
+    expression = StreamExpressionParser.parse("hashJoin("
+        + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join3_i,ident_s\", sort=\"join1_i
asc, join3_i asc, id asc\"),"
+        + "hashed=search(collection1, q=\"side_s:right\", fl=\"join1_i,join3_i,ident_s\",
sort=\"join1_i asc, join3_i asc\"),"
+        + "on=\"join1_i=join3_i\")");
+    stream = new HashJoinStream(expression, factory);
+    tuples = getTuples(stream);
+    assertEquals(17, tuples.size());
+    assertOrder(tuples, 1, 1, 2, 2, 15, 15, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 7);
+
     del("*:*");
     commit();
   }
@@ -1493,7 +1503,17 @@ public class StreamExpressionTest extends AbstractFullDistribZkTestBase
{
     tuples = getTuples(stream);    
     assert(tuples.size() == 8);
     assertOrder(tuples, 1,15,2,3,4,5,6,7);
-        
+
+    // Basic test
+    expression = StreamExpressionParser.parse("outerHashJoin("
+        + "search(collection1, q=\"side_s:left\", fl=\"id,join1_i,join2_s,ident_s\", sort=\"join1_i
asc, join2_s asc, id asc\"),"
+        + "hashed=search(collection1, q=\"side_s:right\", fl=\"join3_i,join2_s,ident_s\",
sort=\"join2_s asc\"),"
+        + "on=\"join1_i=join3_i, join2_s\")");
+    stream = new OuterHashJoinStream(expression, factory);
+    tuples = getTuples(stream);
+    assert(tuples.size() == 10);
+    assertOrder(tuples, 1,1,15,15,2,3,4,5,6,7);
+
     del("*:*");
     commit();
   }


Mime
View raw message