Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 6140D200B66 for ; Wed, 3 Aug 2016 20:14:42 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 5FFCE160A5D; Wed, 3 Aug 2016 18:14:42 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id A8F58160A8C for ; Wed, 3 Aug 2016 20:14:41 +0200 (CEST) Received: (qmail 22816 invoked by uid 500); 3 Aug 2016 18:14:40 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 22700 invoked by uid 99); 3 Aug 2016 18:14:40 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Wed, 03 Aug 2016 18:14:40 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 60AF1E3839; Wed, 3 Aug 2016 18:14:40 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: jbernste@apache.org To: commits@lucene.apache.org Date: Wed, 03 Aug 2016 18:14:40 -0000 Message-Id: <80d04fcb1a0f442799ff042e9179139a@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/3] lucene-solr:branch_6x: SOLR-9252: Feature selection and logistic regression on text archived-at: Wed, 03 Aug 2016 18:14:42 -0000 Repository: lucene-solr Updated Branches: refs/heads/branch_6x 33197ec98 -> 728b4fbcd http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e38d6d53/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExpessionTest.java ---------------------------------------------------------------------- diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExpessionTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExpessionTest.java index 63baa01..4ddf4ce 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExpessionTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExpessionTest.java @@ -62,6 +62,8 @@ public class StreamExpressionToExpessionTest extends LuceneTestCase { .withFunctionName("avg", MeanMetric.class) .withFunctionName("daemon", DaemonStream.class) .withFunctionName("topic", TopicStream.class) + .withFunctionName("tlogit", TextLogitStream.class) + .withFunctionName("featuresSelection", FeaturesSelectionStream.class) ; } @@ -138,7 +140,6 @@ public class StreamExpressionToExpessionTest extends LuceneTestCase { assertTrue(expressionString.contains("checkpointEvery=1000")); } - @Test public void testStatsStream() throws Exception { @@ -342,6 +343,40 @@ public class StreamExpressionToExpessionTest extends LuceneTestCase { assertTrue(firstExpressionString.contains("q=\"presentTitles:\\\"chief, executive officer\\\" AND age:[36 TO *]\"")); assertTrue(secondExpressionString.contains("q=\"presentTitles:\\\"chief, executive officer\\\" AND age:[36 TO *]\"")); } + + @Test + public void testFeaturesSelectionStream() throws Exception { + String expr = "featuresSelection(collection1, q=\"*:*\", featureSet=\"first\", field=\"tv_text\", outcome=\"out_i\", numTerms=4, positiveLabel=2)"; + FeaturesSelectionStream stream = new FeaturesSelectionStream(StreamExpressionParser.parse(expr), factory); + String expressionString = stream.toExpression(factory).toString(); + assertTrue(expressionString.contains("q=\"*:*\"")); + assertTrue(expressionString.contains("featureSet=first")); + assertTrue(expressionString.contains("field=tv_text")); + assertTrue(expressionString.contains("outcome=out_i")); + assertTrue(expressionString.contains("numTerms=4")); + assertTrue(expressionString.contains("positiveLabel=2")); + } + + @Test + public void testTextLogitStreamWithFeaturesSelection() throws Exception { + String expr = "tlogit(" + + "collection1, " + + "q=\"*:*\", " + + "name=\"model\", " + + "featuresSelection(collection1, q=\"*:*\", featureSet=\"first\", field=\"tv_text\", outcome=\"out_i\", numTerms=4), " + + "field=\"tv_text\", " + + "outcome=\"out_i\", " + + "maxIterations=100)"; + TextLogitStream logitStream = new TextLogitStream(StreamExpressionParser.parse(expr), factory); + String expressionString = logitStream.toExpression(factory).toString(); + assertTrue(expressionString.contains("q=\"*:*\"")); + assertTrue(expressionString.contains("name=model")); + assertFalse(expressionString.contains("terms=")); + assertTrue(expressionString.contains("featuresSelection(")); + assertTrue(expressionString.contains("field=tv_text")); + assertTrue(expressionString.contains("outcome=out_i")); + assertTrue(expressionString.contains("maxIterations=100")); + } @Test public void testCountMetric() throws Exception { http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/e38d6d53/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExplanationTest.java ---------------------------------------------------------------------- diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExplanationTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExplanationTest.java index f8765c9..91cab3d 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExplanationTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/io/stream/StreamExpressionToExplanationTest.java @@ -17,7 +17,6 @@ package org.apache.solr.client.solrj.io.stream; import junit.framework.Assert; - import org.apache.lucene.util.LuceneTestCase; import org.apache.solr.client.solrj.io.ops.GroupOperation; import org.apache.solr.client.solrj.io.stream.expr.Explanation;