Return-Path:
X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io
Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io
Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183])
by cust-asf2.ponee.io (Postfix) with ESMTP id 2151D200BB8
for ; Sat, 8 Oct 2016 00:06:16 +0200 (CEST)
Received: by cust-asf.ponee.io (Postfix)
id 1FD73160AE8; Fri, 7 Oct 2016 22:06:16 +0000 (UTC)
Delivered-To: archive-asf-public@cust-asf.ponee.io
Received: from mail.apache.org (hermes.apache.org [140.211.11.3])
by cust-asf.ponee.io (Postfix) with SMTP id 68B3C160AF4
for ; Sat, 8 Oct 2016 00:06:13 +0200 (CEST)
Received: (qmail 67017 invoked by uid 500); 7 Oct 2016 22:06:11 -0000
Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm
Precedence: bulk
List-Help:
List-Unsubscribe:
List-Post:
List-Id:
Reply-To: dev@lucene.apache.org
Delivered-To: mailing list commits@lucene.apache.org
Received: (qmail 66519 invoked by uid 99); 7 Oct 2016 22:06:11 -0000
Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23)
by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Oct 2016 22:06:11 +0000
Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33)
id 5887FE09AC; Fri, 7 Oct 2016 22:06:11 +0000 (UTC)
Content-Type: text/plain; charset="us-ascii"
MIME-Version: 1.0
Content-Transfer-Encoding: 7bit
From: cpoerschke@apache.org
To: commits@lucene.apache.org
Date: Fri, 07 Oct 2016 22:06:14 -0000
Message-Id: <1bb2a0631d254ae782e92ba18b05a387@git.apache.org>
In-Reply-To: <3762255b53494eee82c646be86b5796b@git.apache.org>
References: <3762255b53494eee82c646be86b5796b@git.apache.org>
X-Mailer: ASF-Git Admin Mailer
Subject: [4/8] lucene-solr:jira/solr-8542: SOLR-8542: Added Solr Learning to
Rank (LTR) plugin for reranking results with machine learning models.
(Michael Nilsson, Diego Ceccarelli, Joshua Pantony, Jon Dorando,
Naveen Santhapuri, Alessandro Benedetti, David Groh
archived-at: Fri, 07 Oct 2016 22:06:16 -0000
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
new file mode 100644
index 0000000..c4addfb
--- /dev/null
+++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.ltr.store.rest;
+
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrException.ErrorCode;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.core.SolrResourceLoader;
+import org.apache.solr.ltr.feature.Feature;
+import org.apache.solr.ltr.model.LTRScoringModel;
+import org.apache.solr.ltr.model.ModelException;
+import org.apache.solr.ltr.norm.IdentityNormalizer;
+import org.apache.solr.ltr.norm.Normalizer;
+import org.apache.solr.ltr.store.FeatureStore;
+import org.apache.solr.ltr.store.ModelStore;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.rest.BaseSolrResource;
+import org.apache.solr.rest.ManagedResource;
+import org.apache.solr.rest.ManagedResourceObserver;
+import org.apache.solr.rest.ManagedResourceStorage.StorageIO;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Menaged resource for storing a model
+ */
+public class ManagedModelStore extends ManagedResource implements
+ ManagedResource.ChildResourceSupport {
+
+ public static void registerManagedModelStore(SolrResourceLoader solrResourceLoader,
+ ManagedResourceObserver managedResourceObserver) {
+ solrResourceLoader.getManagedResourceRegistry().registerManagedResource(
+ REST_END_POINT,
+ ManagedModelStore.class,
+ managedResourceObserver);
+ }
+
+ public static ManagedModelStore getManagedModelStore(SolrCore core) {
+ return (ManagedModelStore) core.getRestManager()
+ .getManagedResource(REST_END_POINT);
+ }
+
+ /** the model store rest endpoint **/
+ public static final String REST_END_POINT = "/schema/model-store";
+
+ /** name of the attribute containing the features used by the mode **/
+ private static final Object MODEL_FEATURE_LIST = "features";
+
+ /**
+ * Managed model store: the name of the attribute containing all the models of
+ * a model store
+ **/
+ private static final String MODELS_JSON_FIELD = "models";
+
+ /** name of the attribute containing a class **/
+ static final String CLASS_KEY = "class";
+ /** name of the attribute containing the features **/
+ static final String FEATURES_KEY = "features";
+ /** name of the attribute containing a name **/
+ static final String NAME_KEY = "name";
+ /** name of the attribute containing a normalizer **/
+ static final String NORM_KEY = "norm";
+ /** name of the attribute containing parameters **/
+ static final String PARAMS_KEY = "params";
+ /** name of the attribute containing a store **/
+ static final String STORE_KEY = "store";
+
+ private final ModelStore store;
+ private ManagedFeatureStore managedFeatureStore;
+
+ private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+ public ManagedModelStore(String resourceId, SolrResourceLoader loader,
+ StorageIO storageIO) throws SolrException {
+ super(resourceId, loader, storageIO);
+ store = new ModelStore();
+ }
+
+ public void setManagedFeatureStore(ManagedFeatureStore managedFeatureStore) {
+ log.info("INIT model store");
+ this.managedFeatureStore = managedFeatureStore;
+ }
+
+ public ManagedFeatureStore getManagedFeatureStore() {
+ return managedFeatureStore;
+ }
+
+ private Object managedData;
+
+ @SuppressWarnings("unchecked")
+ @Override
+ protected void onManagedDataLoadedFromStorage(NamedList> managedInitArgs,
+ Object managedData) throws SolrException {
+ store.clear();
+ // the managed models on the disk or on zookeeper will be loaded in a lazy
+ // way, since we need to set the managed features first (unfortunately
+ // managed resources do not
+ // decouple the creation of a managed resource with the reading of the data
+ // from the storage)
+ this.managedData = managedData;
+
+ }
+
+ public void loadStoredModels() {
+ log.info("------ managed models ~ loading ------");
+
+ if ((managedData != null) && (managedData instanceof List)) {
+ final List
+
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/external_features.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/featureExamples/external_features.json b/solr/contrib/ltr/src/test-files/featureExamples/external_features.json
new file mode 100644
index 0000000..6c0cfa6
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/featureExamples/external_features.json
@@ -0,0 +1,51 @@
+[ {
+ "name" : "matchedTitle",
+ "class" : "org.apache.solr.ltr.feature.SolrFeature",
+ "params" : {
+ "q" : "{!terms f=title}${user_query}"
+ }
+}, {
+ "name" : "confidence",
+ "class" : "org.apache.solr.ltr.feature.ValueFeature",
+ "store": "fstore2",
+ "params" : {
+ "value" : "${myconf}"
+ }
+}, {
+ "name":"originalScore",
+ "class":"org.apache.solr.ltr.feature.OriginalScoreFeature",
+ "store": "fstore2",
+ "params":{}
+}, {
+ "name" : "occurrences",
+ "class" : "org.apache.solr.ltr.feature.ValueFeature",
+ "store": "fstore3",
+ "params" : {
+ "value" : "${myOcc}",
+ "required" : false
+ }
+}, {
+ "name":"originalScore",
+ "class":"org.apache.solr.ltr.feature.OriginalScoreFeature",
+ "store": "fstore3",
+ "params":{}
+}, {
+ "name" : "popularity",
+ "class" : "org.apache.solr.ltr.feature.ValueFeature",
+ "store": "fstore4",
+ "params" : {
+ "value" : "${myPop}",
+ "required" : true
+ }
+}, {
+ "name":"originalScore",
+ "class":"org.apache.solr.ltr.feature.OriginalScoreFeature",
+ "store": "fstore4",
+ "params":{}
+}, {
+ "name" : "titlePhraseMatch",
+ "class" : "org.apache.solr.ltr.feature.SolrFeature",
+ "params" : {
+ "q" : "{!field f=title}${user_query}"
+ }
+} ]
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/external_features_for_sparse_processing.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/featureExamples/external_features_for_sparse_processing.json b/solr/contrib/ltr/src/test-files/featureExamples/external_features_for_sparse_processing.json
new file mode 100644
index 0000000..52bab27
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/featureExamples/external_features_for_sparse_processing.json
@@ -0,0 +1,18 @@
+[{
+ "name" : "user_device_smartphone",
+ "class":"org.apache.solr.ltr.feature.ValueFeature",
+ "params" : {
+ "value": "${user_device_smartphone}"
+ }
+},
+ {
+ "name" : "user_device_tablet",
+ "class":"org.apache.solr.ltr.feature.ValueFeature",
+ "params" : {
+ "value": "${user_device_tablet}"
+ }
+ }
+
+
+
+]
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm-efi.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm-efi.json b/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm-efi.json
new file mode 100644
index 0000000..e05542a
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm-efi.json
@@ -0,0 +1,17 @@
+[
+ {
+ "name": "sampleConstant",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 5
+ }
+ },
+ {
+ "name" : "search_number_of_nights",
+ "class":"org.apache.solr.ltr.feature.ValueFeature",
+ "params" : {
+ "value": "${search_number_of_nights}"
+ }
+ }
+
+]
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm.json b/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm.json
new file mode 100644
index 0000000..8cc2996
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm.json
@@ -0,0 +1,51 @@
+[
+ {
+ "name": "title",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 1
+ }
+ },
+ {
+ "name": "description",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 2
+ }
+ },
+ {
+ "name": "keywords",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 2
+ }
+ },
+ {
+ "name": "popularity",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 3
+ }
+ },
+ {
+ "name": "text",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 4
+ }
+ },
+ {
+ "name": "queryIntentPerson",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 5
+ }
+ },
+ {
+ "name": "queryIntentCompany",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 5
+ }
+ }
+]
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/features-store-test-model.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/featureExamples/features-store-test-model.json b/solr/contrib/ltr/src/test-files/featureExamples/features-store-test-model.json
new file mode 100644
index 0000000..69aad84
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/featureExamples/features-store-test-model.json
@@ -0,0 +1,51 @@
+[
+ {
+ "name": "constant1",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "store":"test",
+ "params": {
+ "value": 1
+ }
+ },
+ {
+ "name": "constant2",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "store":"test",
+ "params": {
+ "value": 2
+ }
+ },
+ {
+ "name": "constant3",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "store":"test",
+ "params": {
+ "value": 3
+ }
+ },
+ {
+ "name": "constant4",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "store":"test",
+ "params": {
+ "value": 4
+ }
+ },
+ {
+ "name": "constant5",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "store":"test",
+ "params": {
+ "value": 5
+ }
+ },
+ {
+ "name": "pop",
+ "class": "org.apache.solr.ltr.feature.FieldValueFeature",
+ "store":"test",
+ "params": {
+ "field": "popularity"
+ }
+ }
+
+]
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/fq_features.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/featureExamples/fq_features.json b/solr/contrib/ltr/src/test-files/featureExamples/fq_features.json
new file mode 100644
index 0000000..13968f9
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/featureExamples/fq_features.json
@@ -0,0 +1,16 @@
+[
+ {
+ "name": "matchedTitle",
+ "class": "org.apache.solr.ltr.feature.SolrFeature",
+ "params": {
+ "q": "{!terms f=title}${user_query}"
+ }
+ },
+ {
+ "name": "popularity",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 3
+ }
+ }
+]
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/lambdamart_features.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/featureExamples/lambdamart_features.json b/solr/contrib/ltr/src/test-files/featureExamples/lambdamart_features.json
new file mode 100644
index 0000000..3bc2c77
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/featureExamples/lambdamart_features.json
@@ -0,0 +1,16 @@
+[
+ {
+ "name": "matchedTitle",
+ "class": "org.apache.solr.ltr.feature.SolrFeature",
+ "params": {
+ "q": "{!terms f=title}${user_query}"
+ }
+ },
+ {
+ "name": "constantScoreToForceLambdaMARTScoreAllDocs",
+ "class": "org.apache.solr.ltr.feature.ValueFeature",
+ "params": {
+ "value": 1
+ }
+ }
+]
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/log4j.properties
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/log4j.properties b/solr/contrib/ltr/src/test-files/log4j.properties
new file mode 100644
index 0000000..d86c698
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/log4j.properties
@@ -0,0 +1,32 @@
+# Logging level
+log4j.rootLogger=INFO, CONSOLE
+
+log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
+log4j.appender.CONSOLE.Target=System.err
+log4j.appender.CONSOLE.layout=org.apache.log4j.EnhancedPatternLayout
+log4j.appender.CONSOLE.layout.ConversionPattern=%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n
+log4j.logger.org.apache.zookeeper=WARN
+log4j.logger.org.apache.hadoop=WARN
+log4j.logger.org.apache.directory=WARN
+log4j.logger.org.apache.solr.hadoop=INFO
+log4j.logger.org.apache.solr.client.solrj.embedded.JettySolrRunner=DEBUG
+org.apache.solr.client.solrj.embedded.JettySolrRunner=DEBUG
+
+#log4j.logger.org.apache.solr.update.processor.LogUpdateProcessor=DEBUG
+#log4j.logger.org.apache.solr.update.processor.DistributedUpdateProcessor=DEBUG
+#log4j.logger.org.apache.solr.update.PeerSync=DEBUG
+#log4j.logger.org.apache.solr.core.CoreContainer=DEBUG
+#log4j.logger.org.apache.solr.cloud.RecoveryStrategy=DEBUG
+#log4j.logger.org.apache.solr.cloud.SyncStrategy=DEBUG
+#log4j.logger.org.apache.solr.handler.admin.CoreAdminHandler=DEBUG
+#log4j.logger.org.apache.solr.cloud.ZkController=DEBUG
+#log4j.logger.org.apache.solr.update.DefaultSolrCoreState=DEBUG
+#log4j.logger.org.apache.solr.common.cloud.ConnectionManager=DEBUG
+#log4j.logger.org.apache.solr.update.UpdateLog=DEBUG
+#log4j.logger.org.apache.solr.cloud.ChaosMonkey=DEBUG
+#log4j.logger.org.apache.solr.update.TransactionLog=DEBUG
+#log4j.logger.org.apache.solr.handler.ReplicationHandler=DEBUG
+#log4j.logger.org.apache.solr.handler.IndexFetcher=DEBUG
+
+#log4j.logger.org.apache.solr.common.cloud.ClusterStateUtil=DEBUG
+#log4j.logger.org.apache.solr.cloud.OverseerAutoReplicaFailoverThread=DEBUG
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/external_model.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/external_model.json b/solr/contrib/ltr/src/test-files/modelExamples/external_model.json
new file mode 100644
index 0000000..c7f35c0
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/external_model.json
@@ -0,0 +1,12 @@
+{
+ "class":"org.apache.solr.ltr.model.RankSVMModel",
+ "name":"externalmodel",
+ "features":[
+ { "name": "matchedTitle"}
+ ],
+ "params":{
+ "weights": {
+ "matchedTitle": 0.999
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/external_model_store.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/external_model_store.json b/solr/contrib/ltr/src/test-files/modelExamples/external_model_store.json
new file mode 100644
index 0000000..227a943
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/external_model_store.json
@@ -0,0 +1,13 @@
+{
+ "class":"org.apache.solr.ltr.model.RankSVMModel",
+ "name":"externalmodelstore",
+ "store": "fstore2",
+ "features":[
+ { "name": "confidence"}
+ ],
+ "params":{
+ "weights": {
+ "confidence": 0.999
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/fq-model.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/fq-model.json b/solr/contrib/ltr/src/test-files/modelExamples/fq-model.json
new file mode 100644
index 0000000..c0fa77a
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/fq-model.json
@@ -0,0 +1,20 @@
+{
+ "class":"org.apache.solr.ltr.model.RankSVMModel",
+ "name":"fqmodel",
+ "features":[
+ {
+ "name":"matchedTitle",
+ "norm": {
+ "class":"org.apache.solr.ltr.norm.MinMaxNormalizer",
+ "params":{ "min":"0.0f", "max":"10.0f" }
+ }
+ },
+ { "name":"popularity"}
+ ],
+ "params":{
+ "weights": {
+ "matchedTitle": 0.5,
+ "popularity": 0.5
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel.json
new file mode 100644
index 0000000..782e641
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel.json
@@ -0,0 +1,38 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel",
+ "features":[
+ { "name": "matchedTitle"},
+ { "name": "constantScoreToForceLambdaMARTScoreAllDocs"}
+ ],
+ "params":{
+ "trees": [
+ {
+ "weight" : "1f",
+ "root": {
+ "feature": "matchedTitle",
+ "threshold": "0.5f",
+ "left" : {
+ "value" : "-100"
+ },
+ "right": {
+ "feature" : "this_feature_doesnt_exist",
+ "threshold": "10.0f",
+ "left" : {
+ "value" : "50"
+ },
+ "right" : {
+ "value" : "75"
+ }
+ }
+ }
+ },
+ {
+ "weight" : "2f",
+ "root": {
+ "value" : "-10"
+ }
+ }
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_external_binary_features.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_external_binary_features.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_external_binary_features.json
new file mode 100644
index 0000000..ce884de
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_external_binary_features.json
@@ -0,0 +1,38 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"external_model_binary_feature",
+ "features":[
+ { "name": "user_device_smartphone"},
+ { "name": "user_device_tablet"}
+ ],
+ "params":{
+ "trees": [
+ {
+ "weight" : "1f",
+ "root": {
+ "feature": "user_device_smartphone",
+ "threshold": "0.5f",
+ "left" : {
+ "value" : "0"
+ },
+ "right" : {
+ "value" : "50"
+ }
+
+ }},
+ {
+ "weight" : "1f",
+ "root": {
+ "feature": "user_device_tablet",
+ "threshold": "0.5f",
+ "left" : {
+ "value" : "0"
+ },
+ "right" : {
+ "value" : "65"
+ }
+
+ }}
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_feature.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_feature.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_feature.json
new file mode 100644
index 0000000..96e304f
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_feature.json
@@ -0,0 +1,24 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel_no_feature",
+ "features":[
+ { "name": "matchedTitle"},
+ { "name": "constantScoreToForceLambdaMARTScoreAllDocs"}
+ ],
+ "params":{
+ "trees": [
+ {
+ "weight" : "1f",
+ "root": {
+ "threshold": "0.5f",
+ "left" : {
+ "value" : "-100"
+ },
+ "right": {
+ "value" : "75"
+ }
+ }
+ }
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_features.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_features.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_features.json
new file mode 100644
index 0000000..e534696
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_features.json
@@ -0,0 +1,14 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel_no_features",
+ "params":{
+ "trees": [
+ {
+ "weight" : "2f",
+ "root": {
+ "value" : "-10"
+ }
+ }
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_left.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_left.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_left.json
new file mode 100644
index 0000000..5564bc3
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_left.json
@@ -0,0 +1,22 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel_no_left",
+ "features":[
+ { "name": "matchedTitle"},
+ { "name": "constantScoreToForceLambdaMARTScoreAllDocs"}
+ ],
+ "params":{
+ "trees": [
+ {
+ "weight" : "1f",
+ "root": {
+ "feature": "matchedTitle",
+ "threshold": "0.5f",
+ "right": {
+ "value" : "75"
+ }
+ }
+ }
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_params.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_params.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_params.json
new file mode 100644
index 0000000..e48489d
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_params.json
@@ -0,0 +1,8 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel_no_params",
+ "features":[
+ { "name": "matchedTitle"},
+ { "name": "constantScoreToForceLambdaMARTScoreAllDocs"}
+ ]
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_right.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_right.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_right.json
new file mode 100644
index 0000000..672716d
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_right.json
@@ -0,0 +1,22 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel_no_right",
+ "features":[
+ { "name": "matchedTitle"},
+ { "name": "constantScoreToForceLambdaMARTScoreAllDocs"}
+ ],
+ "params":{
+ "trees": [
+ {
+ "weight" : "1f",
+ "root": {
+ "feature": "matchedTitle",
+ "threshold": "0.5f",
+ "left" : {
+ "value" : "-100"
+ }
+ }
+ }
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_threshold.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_threshold.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_threshold.json
new file mode 100644
index 0000000..2c6922f
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_threshold.json
@@ -0,0 +1,24 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel_no_threshold",
+ "features":[
+ { "name": "matchedTitle"},
+ { "name": "constantScoreToForceLambdaMARTScoreAllDocs"}
+ ],
+ "params":{
+ "trees": [
+ {
+ "weight" : "1f",
+ "root": {
+ "feature": "matchedTitle",
+ "left" : {
+ "value" : "-100"
+ },
+ "right": {
+ "value" : "75"
+ }
+ }
+ }
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_tree.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_tree.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_tree.json
new file mode 100644
index 0000000..6d1ae71
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_tree.json
@@ -0,0 +1,15 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel_no_tree",
+ "features":[
+ { "name": "matchedTitle"},
+ { "name": "constantScoreToForceLambdaMARTScoreAllDocs"}
+ ],
+ "params":{
+ "trees": [
+ {
+ "weight" : "2f"
+ }
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_trees.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_trees.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_trees.json
new file mode 100644
index 0000000..8576782
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_trees.json
@@ -0,0 +1,10 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel_no_trees",
+ "features":[
+ { "name": "matchedTitle"},
+ { "name": "constantScoreToForceLambdaMARTScoreAllDocs"}
+ ],
+ "params":{
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_weight.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_weight.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_weight.json
new file mode 100644
index 0000000..9dbda56
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_weight.json
@@ -0,0 +1,24 @@
+{
+ "class":"org.apache.solr.ltr.model.LambdaMARTModel",
+ "name":"lambdamartmodel_no_weight",
+ "features":[
+ { "name": "matchedTitle"},
+ { "name": "constantScoreToForceLambdaMARTScoreAllDocs"}
+ ],
+ "params":{
+ "trees": [
+ {
+ "root": {
+ "feature": "matchedTitle",
+ "threshold": "0.5f",
+ "left" : {
+ "value" : "-100"
+ },
+ "right": {
+ "value" : "75"
+ }
+ }
+ }
+ ]
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/ranksvm-model.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/ranksvm-model.json b/solr/contrib/ltr/src/test-files/modelExamples/ranksvm-model.json
new file mode 100644
index 0000000..774958a
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/ranksvm-model.json
@@ -0,0 +1,30 @@
+{
+ "class":"org.apache.solr.ltr.model.RankSVMModel",
+ "name":"6029760550880411648",
+ "features":[
+ {"name":"title"},
+ {"name":"description"},
+ {"name":"keywords"},
+ {
+ "name":"popularity",
+ "norm": {
+ "class":"org.apache.solr.ltr.norm.MinMaxNormalizer",
+ "params":{ "min":"0.0f", "max":"10.0f" }
+ }
+ },
+ {"name":"text"},
+ {"name":"queryIntentPerson"},
+ {"name":"queryIntentCompany"}
+ ],
+ "params":{
+ "weights": {
+ "title": 0.0000000000,
+ "description": 0.1000000000,
+ "keywords": 0.2000000000,
+ "popularity": 0.3000000000,
+ "text": 0.4000000000,
+ "queryIntentPerson":0.1231231,
+ "queryIntentCompany":0.12121211
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/svm-model-efi.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/svm-model-efi.json b/solr/contrib/ltr/src/test-files/modelExamples/svm-model-efi.json
new file mode 100644
index 0000000..f7bf902
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/svm-model-efi.json
@@ -0,0 +1,14 @@
+{
+ "class":"org.apache.solr.ltr.model.RankSVMModel",
+ "name":"svm-efi",
+ "features":[
+ {"name":"sampleConstant"},
+ {"name":"search_number_of_nights"}
+ ],
+ "params":{
+ "weights":{
+ "sampleConstant":1.0,
+ "search_number_of_nights":2.0
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/svm-model.json
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/modelExamples/svm-model.json b/solr/contrib/ltr/src/test-files/modelExamples/svm-model.json
new file mode 100644
index 0000000..d3cbfc4
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/modelExamples/svm-model.json
@@ -0,0 +1,20 @@
+{
+ "class":"org.apache.solr.ltr.model.RankSVMModel",
+ "name":"svm",
+ "features":[
+ {"name":"constant1"},
+ {"name":"constant2"},
+ {"name":"constant3"},
+ {"name":"constant4"},
+ {"name":"constant5"}
+ ],
+ "params":{
+ "weights":{
+ "constant1":1,
+ "constant2":2,
+ "constant3":3,
+ "constant4":4,
+ "constant5":5
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/indexSynonyms.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/indexSynonyms.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/indexSynonyms.txt
new file mode 100644
index 0000000..af55e6e
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/indexSynonyms.txt
@@ -0,0 +1,18 @@
+# the asf licenses this file to you under the apache license, version 2.0
+# (the "license"); you may not use this file except in compliance with
+# the license. you may obtain a copy of the license at
+#
+# http://www.apache.org/licenses/license-2.0
+#
+# unless required by applicable law or agreed to in writing, software
+# distributed under the license is distributed on an "as is" basis,
+# without warranties or conditions of any kind, either express or implied.
+# see the license for the specific language governing permissions and
+# limitations under the license.
+
+#-----------------------------------------------------------------------
+
+# some synonym groups specific to this example
+gb,gib,gigabyte,gigabytes
+mb,mib,megabyte,megabytes
+television, televisions, tv, tvs
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/protwords.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/protwords.txt
new file mode 100644
index 0000000..02cb4ac
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/protwords.txt
@@ -0,0 +1,20 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+# Some non-words that normally won't be encountered,
+# just to test that they won't be stemmed.
+
+offical
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml
new file mode 100644
index 0000000..9492508
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml
@@ -0,0 +1,87 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ id
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml
new file mode 100644
index 0000000..1a18471
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml
@@ -0,0 +1,65 @@
+
+
+
+
+ 6.0.0
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 15000
+ false
+
+
+ 1000
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ id
+
+
+
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr_Th10_10.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr_Th10_10.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr_Th10_10.xml
new file mode 100644
index 0000000..fd0940a
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr_Th10_10.xml
@@ -0,0 +1,69 @@
+
+
+
+
+ 6.0.0
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+ 10
+ 10
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 15000
+ false
+
+
+ 1000
+
+
+ ${solr.data.dir:}
+
+
+
+
+
+
+
+ explicit
+ json
+ true
+ id
+
+
+
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/stemdict.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/stemdict.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/stemdict.txt
new file mode 100644
index 0000000..78f05c2
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/stemdict.txt
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# test that we can override the stemming algorithm with our own mappings
+# these must be tab-separated
+salty salt
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/stopwords.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/stopwords.txt
new file mode 100644
index 0000000..eabae3b
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/stopwords.txt
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+a
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt
new file mode 100644
index 0000000..0ef0e8d
--- /dev/null
+++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt
@@ -0,0 +1,28 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma