Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id 2151D200BB8 for ; Sat, 8 Oct 2016 00:06:16 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id 1FD73160AE8; Fri, 7 Oct 2016 22:06:16 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 68B3C160AF4 for ; Sat, 8 Oct 2016 00:06:13 +0200 (CEST) Received: (qmail 67017 invoked by uid 500); 7 Oct 2016 22:06:11 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 66519 invoked by uid 99); 7 Oct 2016 22:06:11 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 07 Oct 2016 22:06:11 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 5887FE09AC; Fri, 7 Oct 2016 22:06:11 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: cpoerschke@apache.org To: commits@lucene.apache.org Date: Fri, 07 Oct 2016 22:06:14 -0000 Message-Id: <1bb2a0631d254ae782e92ba18b05a387@git.apache.org> In-Reply-To: <3762255b53494eee82c646be86b5796b@git.apache.org> References: <3762255b53494eee82c646be86b5796b@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [4/8] lucene-solr:jira/solr-8542: SOLR-8542: Added Solr Learning to Rank (LTR) plugin for reranking results with machine learning models. (Michael Nilsson, Diego Ceccarelli, Joshua Pantony, Jon Dorando, Naveen Santhapuri, Alessandro Benedetti, David Groh archived-at: Fri, 07 Oct 2016 22:06:16 -0000 http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java new file mode 100644 index 0000000..c4addfb --- /dev/null +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/ManagedModelStore.java @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.store.rest; + +import java.lang.invoke.MethodHandles; +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.core.SolrCore; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.ltr.feature.Feature; +import org.apache.solr.ltr.model.LTRScoringModel; +import org.apache.solr.ltr.model.ModelException; +import org.apache.solr.ltr.norm.IdentityNormalizer; +import org.apache.solr.ltr.norm.Normalizer; +import org.apache.solr.ltr.store.FeatureStore; +import org.apache.solr.ltr.store.ModelStore; +import org.apache.solr.response.SolrQueryResponse; +import org.apache.solr.rest.BaseSolrResource; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.rest.ManagedResourceStorage.StorageIO; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Menaged resource for storing a model + */ +public class ManagedModelStore extends ManagedResource implements + ManagedResource.ChildResourceSupport { + + public static void registerManagedModelStore(SolrResourceLoader solrResourceLoader, + ManagedResourceObserver managedResourceObserver) { + solrResourceLoader.getManagedResourceRegistry().registerManagedResource( + REST_END_POINT, + ManagedModelStore.class, + managedResourceObserver); + } + + public static ManagedModelStore getManagedModelStore(SolrCore core) { + return (ManagedModelStore) core.getRestManager() + .getManagedResource(REST_END_POINT); + } + + /** the model store rest endpoint **/ + public static final String REST_END_POINT = "/schema/model-store"; + + /** name of the attribute containing the features used by the mode **/ + private static final Object MODEL_FEATURE_LIST = "features"; + + /** + * Managed model store: the name of the attribute containing all the models of + * a model store + **/ + private static final String MODELS_JSON_FIELD = "models"; + + /** name of the attribute containing a class **/ + static final String CLASS_KEY = "class"; + /** name of the attribute containing the features **/ + static final String FEATURES_KEY = "features"; + /** name of the attribute containing a name **/ + static final String NAME_KEY = "name"; + /** name of the attribute containing a normalizer **/ + static final String NORM_KEY = "norm"; + /** name of the attribute containing parameters **/ + static final String PARAMS_KEY = "params"; + /** name of the attribute containing a store **/ + static final String STORE_KEY = "store"; + + private final ModelStore store; + private ManagedFeatureStore managedFeatureStore; + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + public ManagedModelStore(String resourceId, SolrResourceLoader loader, + StorageIO storageIO) throws SolrException { + super(resourceId, loader, storageIO); + store = new ModelStore(); + } + + public void setManagedFeatureStore(ManagedFeatureStore managedFeatureStore) { + log.info("INIT model store"); + this.managedFeatureStore = managedFeatureStore; + } + + public ManagedFeatureStore getManagedFeatureStore() { + return managedFeatureStore; + } + + private Object managedData; + + @SuppressWarnings("unchecked") + @Override + protected void onManagedDataLoadedFromStorage(NamedList managedInitArgs, + Object managedData) throws SolrException { + store.clear(); + // the managed models on the disk or on zookeeper will be loaded in a lazy + // way, since we need to set the managed features first (unfortunately + // managed resources do not + // decouple the creation of a managed resource with the reading of the data + // from the storage) + this.managedData = managedData; + + } + + public void loadStoredModels() { + log.info("------ managed models ~ loading ------"); + + if ((managedData != null) && (managedData instanceof List)) { + final List> up = (List>) managedData; + for (final Map u : up) { + try { + final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, u, managedFeatureStore); + addModel(algo); + } catch (final ModelException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, e); + } + } + } + } + + public synchronized void addModel(LTRScoringModel ltrScoringModel) throws ModelException { + try { + log.info("adding model {}", ltrScoringModel.getName()); + store.addModel(ltrScoringModel); + } catch (final ModelException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, e); + } + } + + @SuppressWarnings("unchecked") + @Override + protected Object applyUpdatesToManagedData(Object updates) { + if (updates instanceof List) { + final List> up = (List>) updates; + for (final Map u : up) { + try { + final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, u, managedFeatureStore); + addModel(algo); + } catch (final ModelException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, e); + } + } + } + + if (updates instanceof Map) { + final Map map = (Map) updates; + try { + final LTRScoringModel algo = fromLTRScoringModelMap(solrResourceLoader, map, managedFeatureStore); + addModel(algo); + } catch (final ModelException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, e); + } + } + + return modelsAsManagedResources(store.getModels()); + } + + @Override + public synchronized void doDeleteChild(BaseSolrResource endpoint, String childId) { + if (childId.equals("*")) { + store.clear(); + } + if (store.containsModel(childId)) { + store.delete(childId); + } + storeManagedData(applyUpdatesToManagedData(null)); + } + + /** + * Called to retrieve a named part (the given childId) of the resource at the + * given endpoint. Note: since we have a unique child managed store we ignore + * the childId. + */ + @Override + public void doGet(BaseSolrResource endpoint, String childId) { + + final SolrQueryResponse response = endpoint.getSolrResponse(); + response.add(MODELS_JSON_FIELD, + modelsAsManagedResources(store.getModels())); + } + + public LTRScoringModel getModel(String modelName) { + // this function replicates getModelStore().getModel(modelName), but + // it simplifies the testing (we can avoid to mock also a ModelStore). + return store.getModel(modelName); + } + + @Override + public String toString() { + return "ManagedModelStore [store=" + store + ", featureStores=" + + managedFeatureStore + "]"; + } + + /** + * Returns the available models as a list of Maps objects. After an update the + * managed resources needs to return the resources in this format in order to + * store in json somewhere (zookeeper, disk...) + * + * + * @return the available models as a list of Maps objects + */ + private static List modelsAsManagedResources(List models) { + final List list = new ArrayList<>(models.size()); + for (final LTRScoringModel model : models) { + list.add(toLTRScoringModelMap(model)); + } + return list; + } + + @SuppressWarnings("unchecked") + public static LTRScoringModel fromLTRScoringModelMap(SolrResourceLoader solrResourceLoader, + Map modelMap, ManagedFeatureStore managedFeatureStore) { + + final FeatureStore featureStore = + managedFeatureStore.getFeatureStore((String) modelMap.get(STORE_KEY)); + + final List features = new ArrayList<>(); + final List norms = new ArrayList<>(); + + final List featureList = (List) modelMap.get(FEATURES_KEY); + if (featureList != null) { + for (final Object feature : featureList) { + final Map featureMap = (Map) feature; + features.add(lookupFeatureFromFeatureMap(featureMap, featureStore)); + norms.add(createNormalizerFromFeatureMap(solrResourceLoader, featureMap)); + } + } + + return LTRScoringModel.getInstance(solrResourceLoader, + (String) modelMap.get(CLASS_KEY), // modelClassName + (String) modelMap.get(NAME_KEY), // modelName + features, + norms, + featureStore.getName(), + featureStore.getFeatures(), + (Map) modelMap.get(PARAMS_KEY)); + } + + private static LinkedHashMap toLTRScoringModelMap(LTRScoringModel model) { + final LinkedHashMap modelMap = new LinkedHashMap<>(5, 1.0f); + + modelMap.put(NAME_KEY, model.getName()); + modelMap.put(CLASS_KEY, model.getClass().getCanonicalName()); + modelMap.put(STORE_KEY, model.getFeatureStoreName()); + + final List> features = new ArrayList<>(); + final List featuresList = model.getFeatures(); + final List normsList = model.getNorms(); + for (int ii=0; ii featureMap, + FeatureStore featureStore) { + final String featureName = (String)featureMap.get(NAME_KEY); + return (featureName == null ? null + : featureStore.get(featureName)); + } + + @SuppressWarnings("unchecked") + private static Normalizer createNormalizerFromFeatureMap(SolrResourceLoader solrResourceLoader, + Map featureMap) { + final Map normMap = (Map)featureMap.get(NORM_KEY); + return (normMap == null ? IdentityNormalizer.INSTANCE + : fromNormalizerMap(solrResourceLoader, normMap)); + } + + private static LinkedHashMap toFeatureMap(Feature feature, Normalizer norm) { + final LinkedHashMap map = new LinkedHashMap(2, 1.0f); + map.put(NAME_KEY, feature.getName()); + map.put(NORM_KEY, toNormalizerMap(norm)); + return map; + } + + private static Normalizer fromNormalizerMap(SolrResourceLoader solrResourceLoader, + Map normMap) { + final String className = (String) normMap.get(CLASS_KEY); + + @SuppressWarnings("unchecked") + final Map params = (Map) normMap.get(PARAMS_KEY); + + return Normalizer.getInstance(solrResourceLoader, className, params); + } + + private static LinkedHashMap toNormalizerMap(Normalizer norm) { + final LinkedHashMap normalizer = new LinkedHashMap<>(2, 1.0f); + + normalizer.put(CLASS_KEY, norm.getClass().getCanonicalName()); + + final LinkedHashMap params = norm.paramsToMap(); + if (params != null) { + normalizer.put(PARAMS_KEY, params); + } + + return normalizer; + } + +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/package-info.java ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/package-info.java b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/package-info.java new file mode 100644 index 0000000..fbf7029 --- /dev/null +++ b/solr/contrib/ltr/src/java/org/apache/solr/ltr/store/rest/package-info.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Contains the {@link org.apache.solr.rest.ManagedResource} that encapsulate + * the feature and the model stores. + */ +package org.apache.solr.ltr.store.rest; http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/java/org/apache/solr/response/transform/LTRFeatureLoggerTransformerFactory.java ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/java/org/apache/solr/response/transform/LTRFeatureLoggerTransformerFactory.java b/solr/contrib/ltr/src/java/org/apache/solr/response/transform/LTRFeatureLoggerTransformerFactory.java new file mode 100644 index 0000000..605b990 --- /dev/null +++ b/solr/contrib/ltr/src/java/org/apache/solr/response/transform/LTRFeatureLoggerTransformerFactory.java @@ -0,0 +1,258 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.response.transform; + +import java.io.IOException; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.Explanation; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.ltr.FeatureLogger; +import org.apache.solr.ltr.LTRRescorer; +import org.apache.solr.ltr.LTRScoringQuery; +import org.apache.solr.ltr.LTRScoringQuery.ModelWeight; +import org.apache.solr.ltr.LTRThreadModule; +import org.apache.solr.ltr.feature.Feature; +import org.apache.solr.ltr.SolrQueryRequestContextUtils; +import org.apache.solr.ltr.model.LTRScoringModel; +import org.apache.solr.ltr.norm.Normalizer; +import org.apache.solr.ltr.store.FeatureStore; +import org.apache.solr.ltr.store.rest.ManagedFeatureStore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.response.ResultContext; +import org.apache.solr.response.transform.DocTransformer; +import org.apache.solr.response.transform.TransformerFactory; +import org.apache.solr.search.LTRQParserPlugin; +import org.apache.solr.search.SolrIndexSearcher; +import org.apache.solr.util.SolrPluginUtils; + +/** + * This transformer will take care to generate and append in the response the + * features declared in the feature store of the current model. The class is + * useful if you are not interested in the reranking (e.g., bootstrapping a + * machine learning framework). + */ +public class LTRFeatureLoggerTransformerFactory extends TransformerFactory { + + // used inside fl to specify the output format (csv/json) of the extracted features + private static final String FV_RESPONSE_WRITER = "fvwt"; + + // used inside fl to specify the format (dense|sparse) of the extracted features + private static final String FV_FORMAT = "format"; + + // used inside fl to specify the feature store to use for the feature extraction + private static final String FV_STORE = "store"; + + private static String DEFAULT_LOGGING_MODEL_NAME = "logging-model"; + + private String loggingModelName = DEFAULT_LOGGING_MODEL_NAME; + private String defaultFvStore; + private String defaultFvwt; + private String defaultFvFormat; + + private LTRThreadModule threadManager = null; + + public void setLoggingModelName(String loggingModelName) { + this.loggingModelName = loggingModelName; + } + + public void setStore(String defaultFvStore) { + this.defaultFvStore = defaultFvStore; + } + + public void setFvwt(String defaultFvwt) { + this.defaultFvwt = defaultFvwt; + } + + public void setFormat(String defaultFvFormat) { + this.defaultFvFormat = defaultFvFormat; + } + + @Override + public void init(@SuppressWarnings("rawtypes") NamedList args) { + super.init(args); + threadManager = LTRThreadModule.getInstance(args); + SolrPluginUtils.invokeSetters(this, args); + } + + @Override + public DocTransformer create(String name, SolrParams params, + SolrQueryRequest req) { + + // Hint to enable feature vector cache since we are requesting features + SolrQueryRequestContextUtils.setIsExtractingFeatures(req); + + // Communicate which feature store we are requesting features for + SolrQueryRequestContextUtils.setFvStoreName(req, params.get(FV_STORE, defaultFvStore)); + + // Create and supply the feature logger to be used + SolrQueryRequestContextUtils.setFeatureLogger(req, + FeatureLogger.createFeatureLogger( + params.get(FV_RESPONSE_WRITER, defaultFvwt), + params.get(FV_FORMAT, defaultFvFormat))); + + return new FeatureTransformer(name, params, req); + } + + class FeatureTransformer extends DocTransformer { + + final private String name; + final private SolrParams params; + final private SolrQueryRequest req; + + private List leafContexts; + private SolrIndexSearcher searcher; + private LTRScoringQuery scoringQuery; + private ModelWeight modelWeight; + private FeatureLogger featureLogger; + private boolean docsWereNotReranked; + + /** + * @param name + * Name of the field to be added in a document representing the + * feature vectors + */ + public FeatureTransformer(String name, SolrParams params, + SolrQueryRequest req) { + this.name = name; + this.params = params; + this.req = req; + } + + @Override + public String getName() { + return name; + } + + @Override + public void setContext(ResultContext context) { + super.setContext(context); + if (context == null) { + return; + } + if (context.getRequest() == null) { + return; + } + + searcher = context.getSearcher(); + if (searcher == null) { + throw new SolrException( + org.apache.solr.common.SolrException.ErrorCode.BAD_REQUEST, + "searcher is null"); + } + leafContexts = searcher.getTopReaderContext().leaves(); + + // Setup LTRScoringQuery + scoringQuery = SolrQueryRequestContextUtils.getScoringQuery(req); + docsWereNotReranked = (scoringQuery == null); + String featureStoreName = SolrQueryRequestContextUtils.getFvStoreName(req); + if (docsWereNotReranked || (featureStoreName != null && (!featureStoreName.equals(scoringQuery.getScoringModel().getFeatureStoreName())))) { + // if store is set in the transformer we should overwrite the logger + + final ManagedFeatureStore fr = ManagedFeatureStore.getManagedFeatureStore(req.getCore()); + + final FeatureStore store = fr.getFeatureStore(featureStoreName); + featureStoreName = store.getName(); // if featureStoreName was null before this gets actual name + + try { + final LoggingModel lm = new LoggingModel(loggingModelName, + featureStoreName, store.getFeatures()); + + scoringQuery = new LTRScoringQuery(lm, + LTRQParserPlugin.extractEFIParams(params), + true, + threadManager); // request feature weights to be created for all features + + // Local transformer efi if provided + scoringQuery.setOriginalQuery(context.getQuery()); + + }catch (final Exception e) { + throw new SolrException(ErrorCode.BAD_REQUEST, + "retrieving the feature store "+featureStoreName, e); + } + } + + if (scoringQuery.getFeatureLogger() == null){ + scoringQuery.setFeatureLogger( SolrQueryRequestContextUtils.getFeatureLogger(req) ); + } + scoringQuery.setRequest(req); + + featureLogger = scoringQuery.getFeatureLogger(); + + try { + modelWeight = scoringQuery.createWeight(searcher, true, 1f); + } catch (final IOException e) { + throw new SolrException(ErrorCode.BAD_REQUEST, e.getMessage(), e); + } + if (modelWeight == null) { + throw new SolrException(ErrorCode.BAD_REQUEST, + "error logging the features, model weight is null"); + } + } + + @Override + public void transform(SolrDocument doc, int docid, float score) + throws IOException { + Object fv = featureLogger.getFeatureVector(docid, scoringQuery, searcher); + if (fv == null) { // FV for this document was not in the cache + fv = featureLogger.makeFeatureVector( + LTRRescorer.extractFeaturesInfo( + modelWeight, + docid, + (docsWereNotReranked ? new Float(score) : null), + leafContexts)); + } + + doc.addField(name, fv); + } + + } + + private static class LoggingModel extends LTRScoringModel { + + public LoggingModel(String name, String featureStoreName, List allFeatures){ + this(name, Collections.emptyList(), Collections.emptyList(), + featureStoreName, allFeatures, Collections.emptyMap()); + } + + protected LoggingModel(String name, List features, + List norms, String featureStoreName, + List allFeatures, Map params) { + super(name, features, norms, featureStoreName, allFeatures, params); + } + + @Override + public float score(float[] modelFeatureValuesNormalized) { + return 0; + } + + @Override + public Explanation explain(LeafReaderContext context, int doc, float finalScore, List featureExplanations) { + return Explanation.match(finalScore, toString() + + " logging model, used only for logging the features"); + } + + } + +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/java/org/apache/solr/response/transform/package-info.java ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/java/org/apache/solr/response/transform/package-info.java b/solr/contrib/ltr/src/java/org/apache/solr/response/transform/package-info.java new file mode 100644 index 0000000..7b48f36 --- /dev/null +++ b/solr/contrib/ltr/src/java/org/apache/solr/response/transform/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * APIs and implementations of {@link org.apache.solr.response.transform.DocTransformer} for modifying documents in Solr request responses + */ +package org.apache.solr.response.transform; + + http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/java/org/apache/solr/search/LTRQParserPlugin.java ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/java/org/apache/solr/search/LTRQParserPlugin.java b/solr/contrib/ltr/src/java/org/apache/solr/search/LTRQParserPlugin.java new file mode 100644 index 0000000..ad2e81c --- /dev/null +++ b/solr/contrib/ltr/src/java/org/apache/solr/search/LTRQParserPlugin.java @@ -0,0 +1,237 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.search; + +import java.io.IOException; +import java.lang.invoke.MethodHandles; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.apache.lucene.analysis.util.ResourceLoader; +import org.apache.lucene.analysis.util.ResourceLoaderAware; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.Query; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.SolrException.ErrorCode; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.ltr.LTRRescorer; +import org.apache.solr.ltr.LTRThreadModule; +import org.apache.solr.ltr.LTRScoringQuery; +import org.apache.solr.ltr.SolrQueryRequestContextUtils; +import org.apache.solr.ltr.model.LTRScoringModel; +import org.apache.solr.core.SolrResourceLoader; +import org.apache.solr.ltr.store.rest.ManagedFeatureStore; +import org.apache.solr.ltr.store.rest.ManagedModelStore; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.rest.ManagedResource; +import org.apache.solr.rest.ManagedResourceObserver; +import org.apache.solr.search.QParser; +import org.apache.solr.search.QParserPlugin; +import org.apache.solr.search.SyntaxError; +import org.apache.solr.util.SolrPluginUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Plug into solr a rerank model. + * + * Learning to Rank Query Parser Syntax: rq={!ltr model=6029760550880411648 reRankDocs=300 + * efi.myCompanyQueryIntent=0.98} + * + */ +public class LTRQParserPlugin extends QParserPlugin implements ResourceLoaderAware, ManagedResourceObserver { + public static final String NAME = "ltr"; + private static Query defaultQuery = new MatchAllDocsQuery(); + + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + // params for setting custom external info that features can use, like query + // intent + static final String EXTERNAL_FEATURE_INFO = "efi."; + + private ManagedFeatureStore fr = null; + private ManagedModelStore mr = null; + + private LTRThreadModule threadManager = null; + + /** query parser plugin: the name of the attribute for setting the model **/ + public static final String MODEL = "model"; + + /** query parser plugin: default number of documents to rerank **/ + public static final int DEFAULT_RERANK_DOCS = 200; + + /** + * query parser plugin:the param that will select how the number of document + * to rerank + **/ + public static final String RERANK_DOCS = "reRankDocs"; + + @Override + public void init(@SuppressWarnings("rawtypes") NamedList args) { + super.init(args); + threadManager = LTRThreadModule.getInstance(args); + SolrPluginUtils.invokeSetters(this, args); + } + + @Override + public QParser createParser(String qstr, SolrParams localParams, + SolrParams params, SolrQueryRequest req) { + return new LTRQParser(qstr, localParams, params, req); + } + + /** + * Given a set of local SolrParams, extract all of the efi.key=value params into a map + * @param localParams Local request parameters that might conatin efi params + * @return Map of efi params, where the key is the name of the efi param, and the + * value is the value of the efi param + */ + public static Map extractEFIParams(SolrParams localParams) { + final Map externalFeatureInfo = new HashMap<>(); + for (final Iterator it = localParams.getParameterNamesIterator(); it + .hasNext();) { + final String name = it.next(); + if (name.startsWith(EXTERNAL_FEATURE_INFO)) { + externalFeatureInfo.put( + name.substring(EXTERNAL_FEATURE_INFO.length()), + new String[] {localParams.get(name)}); + } + } + return externalFeatureInfo; + } + + + @Override + public void inform(ResourceLoader loader) throws IOException { + final SolrResourceLoader solrResourceLoader = (SolrResourceLoader) loader; + ManagedFeatureStore.registerManagedFeatureStore(solrResourceLoader, this); + ManagedModelStore.registerManagedModelStore(solrResourceLoader, this); + } + + @Override + public void onManagedResourceInitialized(NamedList args, ManagedResource res) throws SolrException { + if (res instanceof ManagedFeatureStore) { + fr = (ManagedFeatureStore)res; + } + if (res instanceof ManagedModelStore){ + mr = (ManagedModelStore)res; + } + if (mr != null && fr != null){ + mr.setManagedFeatureStore(fr); + // now we can safely load the models + mr.loadStoredModels(); + + } + } + + public class LTRQParser extends QParser { + + public LTRQParser(String qstr, SolrParams localParams, SolrParams params, + SolrQueryRequest req) { + super(qstr, localParams, params, req); + } + + @Override + public Query parse() throws SyntaxError { + // ReRanking Model + final String modelName = localParams.get(LTRQParserPlugin.MODEL); + if ((modelName == null) || modelName.isEmpty()) { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, + "Must provide model in the request"); + } + + final LTRScoringModel ltrScoringModel = mr.getModel(modelName); + if (ltrScoringModel == null) { + throw new SolrException(ErrorCode.BAD_REQUEST, + "cannot find " + LTRQParserPlugin.MODEL + " " + modelName); + } + + final String modelFeatureStoreName = ltrScoringModel.getFeatureStoreName(); + final boolean extractFeatures = SolrQueryRequestContextUtils.isExtractingFeatures(req); + final String fvStoreName = SolrQueryRequestContextUtils.getFvStoreName(req); + // Check if features are requested and if the model feature store and feature-transform feature store are the same + final boolean featuresRequestedFromSameStore = (modelFeatureStoreName.equals(fvStoreName) || fvStoreName == null) ? extractFeatures:false; + + final LTRScoringQuery scoringQuery = new LTRScoringQuery(ltrScoringModel, + extractEFIParams(localParams), + featuresRequestedFromSameStore, threadManager); + + // Enable the feature vector caching if we are extracting features, and the features + // we requested are the same ones we are reranking with + if (featuresRequestedFromSameStore) { + scoringQuery.setFeatureLogger( SolrQueryRequestContextUtils.getFeatureLogger(req) ); + } + SolrQueryRequestContextUtils.setScoringQuery(req, scoringQuery); + + int reRankDocs = localParams.getInt(RERANK_DOCS, DEFAULT_RERANK_DOCS); + reRankDocs = Math.max(1, reRankDocs); + + // External features + scoringQuery.setRequest(req); + + return new LTRQuery(scoringQuery, reRankDocs); + } + } + + /** + * A learning to rank Query, will incapsulate a learning to rank model, and delegate to it the rescoring + * of the documents. + **/ + public class LTRQuery extends AbstractReRankQuery { + private final LTRScoringQuery scoringQuery; + + public LTRQuery(LTRScoringQuery scoringQuery, int reRankDocs) { + super(defaultQuery, reRankDocs, new LTRRescorer(scoringQuery)); + this.scoringQuery = scoringQuery; + } + + @Override + public int hashCode() { + return 31 * classHash() + (mainQuery.hashCode() + scoringQuery.hashCode() + reRankDocs); + } + + @Override + public boolean equals(Object o) { + return sameClassAs(o) && equalsTo(getClass().cast(o)); + } + + private boolean equalsTo(LTRQuery other) { + return (mainQuery.equals(other.mainQuery) + && scoringQuery.equals(other.scoringQuery) && (reRankDocs == other.reRankDocs)); + } + + @Override + public RankQuery wrap(Query _mainQuery) { + super.wrap(_mainQuery); + scoringQuery.setOriginalQuery(_mainQuery); + return this; + } + + @Override + public String toString(String field) { + return "{!ltr mainQuery='" + mainQuery.toString() + "' scoringQuery='" + + scoringQuery.toString() + "' reRankDocs=" + reRankDocs + "}"; + } + + @Override + protected Query rewrite(Query rewrittenMainQuery) throws IOException { + return new LTRQuery(scoringQuery, reRankDocs).wrap(rewrittenMainQuery); + } + } + +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/java/org/apache/solr/search/package-info.java ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/java/org/apache/solr/search/package-info.java b/solr/contrib/ltr/src/java/org/apache/solr/search/package-info.java new file mode 100644 index 0000000..67940b6 --- /dev/null +++ b/solr/contrib/ltr/src/java/org/apache/solr/search/package-info.java @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * APIs and classes for {@linkplain org.apache.solr.search.QParserPlugin parsing} and {@linkplain org.apache.solr.search.SolrIndexSearcher processing} search requests + */ +package org.apache.solr.search; + + http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/java/overview.html ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/java/overview.html b/solr/contrib/ltr/src/java/overview.html new file mode 100644 index 0000000..d27aa9e --- /dev/null +++ b/solr/contrib/ltr/src/java/overview.html @@ -0,0 +1,91 @@ + + + +Apache Solr Search Server: Learning to Rank Contrib + +

+This module contains a logic to plug machine learned ranking modules into Solr. +

+

+In information retrieval systems, Learning to Rank is used to re-rank the top X +retrieved documents using trained machine learning models. The hope is +that sophisticated models can make more nuanced ranking decisions than standard ranking +functions like TF-IDF or BM25. +

+

+This module allows to plug a reranking component directly into Solr, enabling users +lto easily build their own learning to rank systems and access the rich +matching features readily available in Solr. It also provides tools to perform +feature engineering and feature extraction. +

+

Code structure

+

+A Learning to Rank model is plugged into the ranking through the {@link org.apache.solr.search.LTRQParserPlugin}, +a {@link org.apache.solr.search.QParserPlugin}. The plugin will +read from the request the model (instance of {@link org.apache.solr.ltr.LTRScoringQuery}) +used to perform the request plus other +parameters. The plugin will generate a {@link org.apache.solr.search.LTRQParserPlugin.LTRQuery LTRQuery}: +a particular {@link org.apache.solr.search.RankQuery} +that will encapsulate the given model and use it to +rescore and rerank the document (by using an {@link org.apache.solr.ltr.LTRRescorer}). +

+

+A model will be applied on each document through a {@link org.apache.solr.ltr.LTRScoringQuery}, a +subclass of {@link org.apache.lucene.search.Query}. As a normal query, +the learned model will produce a new score +for each document reranked. +

+

+A {@link org.apache.solr.ltr.LTRScoringQuery} is created by providing an instance of +{@link org.apache.solr.ltr.model.LTRScoringModel}. An instance of +{@link org.apache.solr.ltr.model.LTRScoringModel} +defines how to combine the features in order to create a new +score for a document. A new learning to rank model is plugged +into the framework by extending {@link org.apache.solr.ltr.model.LTRScoringModel}, +(see for example {@link org.apache.solr.ltr.model.LambdaMARTModel} and {@link org.apache.solr.ltr.model.RankSVMModel}). +

+

+The {@link org.apache.solr.ltr.LTRScoringQuery} will take care of computing the values of +all the features (see {@link org.apache.solr.ltr.feature.Feature}) and then will delegate the final score +generation to the {@link org.apache.solr.ltr.model.LTRScoringModel}, by calling the method +{@link org.apache.solr.ltr.model.LTRScoringModel#score(float[] modelFeatureValuesNormalized) score(float[] modelFeatureValuesNormalized)}. +

+

+A {@link org.apache.solr.ltr.feature.Feature} will produce a particular value for each document, so +it is modeled as a {@link org.apache.lucene.search.Query}. The package +{@link org.apache.solr.ltr.feature} contains several examples +of features. One benefit of extending the Query object is that we can reuse +Query as a feature, see for example {@link org.apache.solr.ltr.feature.SolrFeature}. +Features for a document can also be returned in the response by +using {@link org.apache.solr.response.transform.DocTransformer DocTransformer} +provided by {@link org.apache.solr.response.transform.LTRFeatureLoggerTransformerFactory}. +

+

+{@link org.apache.solr.ltr.store} contains all the logic to store all the features and the models +added by a user. Models are registered into a unique {@link org.apache.solr.ltr.store.ModelStore ModelStore}, +and each model specifies a particular {@link org.apache.solr.ltr.store.FeatureStore FeatureStore} that +will contain a particular subset of features. +

+

+A user can manage features and models through a REST API, provided by the +{@link org.apache.solr.rest.ManagedResource Managed Resources} +{@link org.apache.solr.ltr.store.rest.ManagedFeatureStore ManagedFeatureStore} +and {@link org.apache.solr.ltr.store.rest.ManagedModelStore ManagedModelStore}. +

+ + http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/external_features.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/featureExamples/external_features.json b/solr/contrib/ltr/src/test-files/featureExamples/external_features.json new file mode 100644 index 0000000..6c0cfa6 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/featureExamples/external_features.json @@ -0,0 +1,51 @@ +[ { + "name" : "matchedTitle", + "class" : "org.apache.solr.ltr.feature.SolrFeature", + "params" : { + "q" : "{!terms f=title}${user_query}" + } +}, { + "name" : "confidence", + "class" : "org.apache.solr.ltr.feature.ValueFeature", + "store": "fstore2", + "params" : { + "value" : "${myconf}" + } +}, { + "name":"originalScore", + "class":"org.apache.solr.ltr.feature.OriginalScoreFeature", + "store": "fstore2", + "params":{} +}, { + "name" : "occurrences", + "class" : "org.apache.solr.ltr.feature.ValueFeature", + "store": "fstore3", + "params" : { + "value" : "${myOcc}", + "required" : false + } +}, { + "name":"originalScore", + "class":"org.apache.solr.ltr.feature.OriginalScoreFeature", + "store": "fstore3", + "params":{} +}, { + "name" : "popularity", + "class" : "org.apache.solr.ltr.feature.ValueFeature", + "store": "fstore4", + "params" : { + "value" : "${myPop}", + "required" : true + } +}, { + "name":"originalScore", + "class":"org.apache.solr.ltr.feature.OriginalScoreFeature", + "store": "fstore4", + "params":{} +}, { + "name" : "titlePhraseMatch", + "class" : "org.apache.solr.ltr.feature.SolrFeature", + "params" : { + "q" : "{!field f=title}${user_query}" + } +} ] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/external_features_for_sparse_processing.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/featureExamples/external_features_for_sparse_processing.json b/solr/contrib/ltr/src/test-files/featureExamples/external_features_for_sparse_processing.json new file mode 100644 index 0000000..52bab27 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/featureExamples/external_features_for_sparse_processing.json @@ -0,0 +1,18 @@ +[{ + "name" : "user_device_smartphone", + "class":"org.apache.solr.ltr.feature.ValueFeature", + "params" : { + "value": "${user_device_smartphone}" + } +}, + { + "name" : "user_device_tablet", + "class":"org.apache.solr.ltr.feature.ValueFeature", + "params" : { + "value": "${user_device_tablet}" + } + } + + + +] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm-efi.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm-efi.json b/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm-efi.json new file mode 100644 index 0000000..e05542a --- /dev/null +++ b/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm-efi.json @@ -0,0 +1,17 @@ +[ + { + "name": "sampleConstant", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 5 + } + }, + { + "name" : "search_number_of_nights", + "class":"org.apache.solr.ltr.feature.ValueFeature", + "params" : { + "value": "${search_number_of_nights}" + } + } + +] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm.json b/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm.json new file mode 100644 index 0000000..8cc2996 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/featureExamples/features-ranksvm.json @@ -0,0 +1,51 @@ +[ + { + "name": "title", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 1 + } + }, + { + "name": "description", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 2 + } + }, + { + "name": "keywords", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 2 + } + }, + { + "name": "popularity", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 3 + } + }, + { + "name": "text", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 4 + } + }, + { + "name": "queryIntentPerson", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 5 + } + }, + { + "name": "queryIntentCompany", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 5 + } + } +] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/features-store-test-model.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/featureExamples/features-store-test-model.json b/solr/contrib/ltr/src/test-files/featureExamples/features-store-test-model.json new file mode 100644 index 0000000..69aad84 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/featureExamples/features-store-test-model.json @@ -0,0 +1,51 @@ +[ + { + "name": "constant1", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "store":"test", + "params": { + "value": 1 + } + }, + { + "name": "constant2", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "store":"test", + "params": { + "value": 2 + } + }, + { + "name": "constant3", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "store":"test", + "params": { + "value": 3 + } + }, + { + "name": "constant4", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "store":"test", + "params": { + "value": 4 + } + }, + { + "name": "constant5", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "store":"test", + "params": { + "value": 5 + } + }, + { + "name": "pop", + "class": "org.apache.solr.ltr.feature.FieldValueFeature", + "store":"test", + "params": { + "field": "popularity" + } + } + +] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/fq_features.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/featureExamples/fq_features.json b/solr/contrib/ltr/src/test-files/featureExamples/fq_features.json new file mode 100644 index 0000000..13968f9 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/featureExamples/fq_features.json @@ -0,0 +1,16 @@ +[ + { + "name": "matchedTitle", + "class": "org.apache.solr.ltr.feature.SolrFeature", + "params": { + "q": "{!terms f=title}${user_query}" + } + }, + { + "name": "popularity", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 3 + } + } +] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/featureExamples/lambdamart_features.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/featureExamples/lambdamart_features.json b/solr/contrib/ltr/src/test-files/featureExamples/lambdamart_features.json new file mode 100644 index 0000000..3bc2c77 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/featureExamples/lambdamart_features.json @@ -0,0 +1,16 @@ +[ + { + "name": "matchedTitle", + "class": "org.apache.solr.ltr.feature.SolrFeature", + "params": { + "q": "{!terms f=title}${user_query}" + } + }, + { + "name": "constantScoreToForceLambdaMARTScoreAllDocs", + "class": "org.apache.solr.ltr.feature.ValueFeature", + "params": { + "value": 1 + } + } +] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/log4j.properties ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/log4j.properties b/solr/contrib/ltr/src/test-files/log4j.properties new file mode 100644 index 0000000..d86c698 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/log4j.properties @@ -0,0 +1,32 @@ +# Logging level +log4j.rootLogger=INFO, CONSOLE + +log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender +log4j.appender.CONSOLE.Target=System.err +log4j.appender.CONSOLE.layout=org.apache.log4j.EnhancedPatternLayout +log4j.appender.CONSOLE.layout.ConversionPattern=%-4r %-5p (%t) [%X{node_name} %X{collection} %X{shard} %X{replica} %X{core}] %c{1.} %m%n +log4j.logger.org.apache.zookeeper=WARN +log4j.logger.org.apache.hadoop=WARN +log4j.logger.org.apache.directory=WARN +log4j.logger.org.apache.solr.hadoop=INFO +log4j.logger.org.apache.solr.client.solrj.embedded.JettySolrRunner=DEBUG +org.apache.solr.client.solrj.embedded.JettySolrRunner=DEBUG + +#log4j.logger.org.apache.solr.update.processor.LogUpdateProcessor=DEBUG +#log4j.logger.org.apache.solr.update.processor.DistributedUpdateProcessor=DEBUG +#log4j.logger.org.apache.solr.update.PeerSync=DEBUG +#log4j.logger.org.apache.solr.core.CoreContainer=DEBUG +#log4j.logger.org.apache.solr.cloud.RecoveryStrategy=DEBUG +#log4j.logger.org.apache.solr.cloud.SyncStrategy=DEBUG +#log4j.logger.org.apache.solr.handler.admin.CoreAdminHandler=DEBUG +#log4j.logger.org.apache.solr.cloud.ZkController=DEBUG +#log4j.logger.org.apache.solr.update.DefaultSolrCoreState=DEBUG +#log4j.logger.org.apache.solr.common.cloud.ConnectionManager=DEBUG +#log4j.logger.org.apache.solr.update.UpdateLog=DEBUG +#log4j.logger.org.apache.solr.cloud.ChaosMonkey=DEBUG +#log4j.logger.org.apache.solr.update.TransactionLog=DEBUG +#log4j.logger.org.apache.solr.handler.ReplicationHandler=DEBUG +#log4j.logger.org.apache.solr.handler.IndexFetcher=DEBUG + +#log4j.logger.org.apache.solr.common.cloud.ClusterStateUtil=DEBUG +#log4j.logger.org.apache.solr.cloud.OverseerAutoReplicaFailoverThread=DEBUG http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/external_model.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/external_model.json b/solr/contrib/ltr/src/test-files/modelExamples/external_model.json new file mode 100644 index 0000000..c7f35c0 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/external_model.json @@ -0,0 +1,12 @@ +{ + "class":"org.apache.solr.ltr.model.RankSVMModel", + "name":"externalmodel", + "features":[ + { "name": "matchedTitle"} + ], + "params":{ + "weights": { + "matchedTitle": 0.999 + } + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/external_model_store.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/external_model_store.json b/solr/contrib/ltr/src/test-files/modelExamples/external_model_store.json new file mode 100644 index 0000000..227a943 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/external_model_store.json @@ -0,0 +1,13 @@ +{ + "class":"org.apache.solr.ltr.model.RankSVMModel", + "name":"externalmodelstore", + "store": "fstore2", + "features":[ + { "name": "confidence"} + ], + "params":{ + "weights": { + "confidence": 0.999 + } + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/fq-model.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/fq-model.json b/solr/contrib/ltr/src/test-files/modelExamples/fq-model.json new file mode 100644 index 0000000..c0fa77a --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/fq-model.json @@ -0,0 +1,20 @@ +{ + "class":"org.apache.solr.ltr.model.RankSVMModel", + "name":"fqmodel", + "features":[ + { + "name":"matchedTitle", + "norm": { + "class":"org.apache.solr.ltr.norm.MinMaxNormalizer", + "params":{ "min":"0.0f", "max":"10.0f" } + } + }, + { "name":"popularity"} + ], + "params":{ + "weights": { + "matchedTitle": 0.5, + "popularity": 0.5 + } + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel.json new file mode 100644 index 0000000..782e641 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel.json @@ -0,0 +1,38 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel", + "features":[ + { "name": "matchedTitle"}, + { "name": "constantScoreToForceLambdaMARTScoreAllDocs"} + ], + "params":{ + "trees": [ + { + "weight" : "1f", + "root": { + "feature": "matchedTitle", + "threshold": "0.5f", + "left" : { + "value" : "-100" + }, + "right": { + "feature" : "this_feature_doesnt_exist", + "threshold": "10.0f", + "left" : { + "value" : "50" + }, + "right" : { + "value" : "75" + } + } + } + }, + { + "weight" : "2f", + "root": { + "value" : "-10" + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_external_binary_features.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_external_binary_features.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_external_binary_features.json new file mode 100644 index 0000000..ce884de --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_external_binary_features.json @@ -0,0 +1,38 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"external_model_binary_feature", + "features":[ + { "name": "user_device_smartphone"}, + { "name": "user_device_tablet"} + ], + "params":{ + "trees": [ + { + "weight" : "1f", + "root": { + "feature": "user_device_smartphone", + "threshold": "0.5f", + "left" : { + "value" : "0" + }, + "right" : { + "value" : "50" + } + + }}, + { + "weight" : "1f", + "root": { + "feature": "user_device_tablet", + "threshold": "0.5f", + "left" : { + "value" : "0" + }, + "right" : { + "value" : "65" + } + + }} + ] + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_feature.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_feature.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_feature.json new file mode 100644 index 0000000..96e304f --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_feature.json @@ -0,0 +1,24 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel_no_feature", + "features":[ + { "name": "matchedTitle"}, + { "name": "constantScoreToForceLambdaMARTScoreAllDocs"} + ], + "params":{ + "trees": [ + { + "weight" : "1f", + "root": { + "threshold": "0.5f", + "left" : { + "value" : "-100" + }, + "right": { + "value" : "75" + } + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_features.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_features.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_features.json new file mode 100644 index 0000000..e534696 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_features.json @@ -0,0 +1,14 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel_no_features", + "params":{ + "trees": [ + { + "weight" : "2f", + "root": { + "value" : "-10" + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_left.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_left.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_left.json new file mode 100644 index 0000000..5564bc3 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_left.json @@ -0,0 +1,22 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel_no_left", + "features":[ + { "name": "matchedTitle"}, + { "name": "constantScoreToForceLambdaMARTScoreAllDocs"} + ], + "params":{ + "trees": [ + { + "weight" : "1f", + "root": { + "feature": "matchedTitle", + "threshold": "0.5f", + "right": { + "value" : "75" + } + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_params.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_params.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_params.json new file mode 100644 index 0000000..e48489d --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_params.json @@ -0,0 +1,8 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel_no_params", + "features":[ + { "name": "matchedTitle"}, + { "name": "constantScoreToForceLambdaMARTScoreAllDocs"} + ] +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_right.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_right.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_right.json new file mode 100644 index 0000000..672716d --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_right.json @@ -0,0 +1,22 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel_no_right", + "features":[ + { "name": "matchedTitle"}, + { "name": "constantScoreToForceLambdaMARTScoreAllDocs"} + ], + "params":{ + "trees": [ + { + "weight" : "1f", + "root": { + "feature": "matchedTitle", + "threshold": "0.5f", + "left" : { + "value" : "-100" + } + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_threshold.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_threshold.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_threshold.json new file mode 100644 index 0000000..2c6922f --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_threshold.json @@ -0,0 +1,24 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel_no_threshold", + "features":[ + { "name": "matchedTitle"}, + { "name": "constantScoreToForceLambdaMARTScoreAllDocs"} + ], + "params":{ + "trees": [ + { + "weight" : "1f", + "root": { + "feature": "matchedTitle", + "left" : { + "value" : "-100" + }, + "right": { + "value" : "75" + } + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_tree.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_tree.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_tree.json new file mode 100644 index 0000000..6d1ae71 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_tree.json @@ -0,0 +1,15 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel_no_tree", + "features":[ + { "name": "matchedTitle"}, + { "name": "constantScoreToForceLambdaMARTScoreAllDocs"} + ], + "params":{ + "trees": [ + { + "weight" : "2f" + } + ] + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_trees.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_trees.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_trees.json new file mode 100644 index 0000000..8576782 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_trees.json @@ -0,0 +1,10 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel_no_trees", + "features":[ + { "name": "matchedTitle"}, + { "name": "constantScoreToForceLambdaMARTScoreAllDocs"} + ], + "params":{ + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_weight.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_weight.json b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_weight.json new file mode 100644 index 0000000..9dbda56 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/lambdamartmodel_no_weight.json @@ -0,0 +1,24 @@ +{ + "class":"org.apache.solr.ltr.model.LambdaMARTModel", + "name":"lambdamartmodel_no_weight", + "features":[ + { "name": "matchedTitle"}, + { "name": "constantScoreToForceLambdaMARTScoreAllDocs"} + ], + "params":{ + "trees": [ + { + "root": { + "feature": "matchedTitle", + "threshold": "0.5f", + "left" : { + "value" : "-100" + }, + "right": { + "value" : "75" + } + } + } + ] + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/ranksvm-model.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/ranksvm-model.json b/solr/contrib/ltr/src/test-files/modelExamples/ranksvm-model.json new file mode 100644 index 0000000..774958a --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/ranksvm-model.json @@ -0,0 +1,30 @@ +{ + "class":"org.apache.solr.ltr.model.RankSVMModel", + "name":"6029760550880411648", + "features":[ + {"name":"title"}, + {"name":"description"}, + {"name":"keywords"}, + { + "name":"popularity", + "norm": { + "class":"org.apache.solr.ltr.norm.MinMaxNormalizer", + "params":{ "min":"0.0f", "max":"10.0f" } + } + }, + {"name":"text"}, + {"name":"queryIntentPerson"}, + {"name":"queryIntentCompany"} + ], + "params":{ + "weights": { + "title": 0.0000000000, + "description": 0.1000000000, + "keywords": 0.2000000000, + "popularity": 0.3000000000, + "text": 0.4000000000, + "queryIntentPerson":0.1231231, + "queryIntentCompany":0.12121211 + } + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/svm-model-efi.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/svm-model-efi.json b/solr/contrib/ltr/src/test-files/modelExamples/svm-model-efi.json new file mode 100644 index 0000000..f7bf902 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/svm-model-efi.json @@ -0,0 +1,14 @@ +{ + "class":"org.apache.solr.ltr.model.RankSVMModel", + "name":"svm-efi", + "features":[ + {"name":"sampleConstant"}, + {"name":"search_number_of_nights"} + ], + "params":{ + "weights":{ + "sampleConstant":1.0, + "search_number_of_nights":2.0 + } + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/modelExamples/svm-model.json ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/modelExamples/svm-model.json b/solr/contrib/ltr/src/test-files/modelExamples/svm-model.json new file mode 100644 index 0000000..d3cbfc4 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/modelExamples/svm-model.json @@ -0,0 +1,20 @@ +{ + "class":"org.apache.solr.ltr.model.RankSVMModel", + "name":"svm", + "features":[ + {"name":"constant1"}, + {"name":"constant2"}, + {"name":"constant3"}, + {"name":"constant4"}, + {"name":"constant5"} + ], + "params":{ + "weights":{ + "constant1":1, + "constant2":2, + "constant3":3, + "constant4":4, + "constant5":5 + } + } +} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/indexSynonyms.txt ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/indexSynonyms.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/indexSynonyms.txt new file mode 100644 index 0000000..af55e6e --- /dev/null +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/indexSynonyms.txt @@ -0,0 +1,18 @@ +# the asf licenses this file to you under the apache license, version 2.0 +# (the "license"); you may not use this file except in compliance with +# the license. you may obtain a copy of the license at +# +# http://www.apache.org/licenses/license-2.0 +# +# unless required by applicable law or agreed to in writing, software +# distributed under the license is distributed on an "as is" basis, +# without warranties or conditions of any kind, either express or implied. +# see the license for the specific language governing permissions and +# limitations under the license. + +#----------------------------------------------------------------------- + +# some synonym groups specific to this example +gb,gib,gigabyte,gigabytes +mb,mib,megabyte,megabytes +television, televisions, tv, tvs http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/protwords.txt ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/protwords.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/protwords.txt new file mode 100644 index 0000000..02cb4ac --- /dev/null +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/protwords.txt @@ -0,0 +1,20 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# Use a protected word file to protect against the stemmer reducing two +# unrelated words to the same base word. + +# Some non-words that normally won't be encountered, +# just to test that they won't be stemmed. + +offical http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml new file mode 100644 index 0000000..9492508 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/schema-ltr.xml @@ -0,0 +1,87 @@ + + + + + + + + + + + + + + + + + + id + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml new file mode 100644 index 0000000..1a18471 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr.xml @@ -0,0 +1,65 @@ + + + + + 6.0.0 + ${solr.data.dir:} + + + + + + + + + + + + + + + + + + + 15000 + false + + + 1000 + + + ${solr.data.dir:} + + + + + + + + explicit + json + true + id + + + + http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr_Th10_10.xml ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr_Th10_10.xml b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr_Th10_10.xml new file mode 100644 index 0000000..fd0940a --- /dev/null +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/solrconfig-ltr_Th10_10.xml @@ -0,0 +1,69 @@ + + + + + 6.0.0 + ${solr.data.dir:} + + + + + + + + 10 + 10 + + + + + + + + + + + + + + + 15000 + false + + + 1000 + + + ${solr.data.dir:} + + + + + + + + explicit + json + true + id + + + + http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/stemdict.txt ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/stemdict.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/stemdict.txt new file mode 100644 index 0000000..78f05c2 --- /dev/null +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/stemdict.txt @@ -0,0 +1,19 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +# test that we can override the stemming algorithm with our own mappings +# these must be tab-separated +salty salt http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/stopwords.txt ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/stopwords.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/stopwords.txt new file mode 100644 index 0000000..eabae3b --- /dev/null +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/stopwords.txt @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +a http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/f2a8e8ac/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt ---------------------------------------------------------------------- diff --git a/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt b/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt new file mode 100644 index 0000000..0ef0e8d --- /dev/null +++ b/solr/contrib/ltr/src/test-files/solr/collection1/conf/synonyms.txt @@ -0,0 +1,28 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +#----------------------------------------------------------------------- +#some test synonym mappings unlikely to appear in real input text +aaafoo => aaabar +bbbfoo => bbbfoo bbbbar +cccfoo => cccbar cccbaz +fooaaa,baraaa,bazaaa + +# Some synonym groups specific to this example +GB,gib,gigabyte,gigabytes +MB,mib,megabyte,megabytes +Television, Televisions, TV, TVs +#notice we use "gib" instead of "GiB" so any WordDelimiterFilter coming +#after us won't split it into two words. + +# Synonym mappings can be used for spelling correction too +pixima => pixma