Return-Path: X-Original-To: archive-asf-public-internal@cust-asf2.ponee.io Delivered-To: archive-asf-public-internal@cust-asf2.ponee.io Received: from cust-asf.ponee.io (cust-asf.ponee.io [163.172.22.183]) by cust-asf2.ponee.io (Postfix) with ESMTP id C1148200C56 for ; Thu, 30 Mar 2017 19:24:36 +0200 (CEST) Received: by cust-asf.ponee.io (Postfix) id BF837160BA6; Thu, 30 Mar 2017 17:24:36 +0000 (UTC) Delivered-To: archive-asf-public@cust-asf.ponee.io Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by cust-asf.ponee.io (Postfix) with SMTP id 7F5BE160BA3 for ; Thu, 30 Mar 2017 19:24:34 +0200 (CEST) Received: (qmail 10600 invoked by uid 500); 30 Mar 2017 17:24:32 -0000 Mailing-List: contact commits-help@lucene.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@lucene.apache.org Delivered-To: mailing list commits@lucene.apache.org Received: (qmail 9791 invoked by uid 99); 30 Mar 2017 17:24:30 -0000 Received: from git1-us-west.apache.org (HELO git1-us-west.apache.org) (140.211.11.23) by apache.org (qpsmtpd/0.29) with ESMTP; Thu, 30 Mar 2017 17:24:30 +0000 Received: by git1-us-west.apache.org (ASF Mail Server at git1-us-west.apache.org, from userid 33) id 3AA4BE1794; Thu, 30 Mar 2017 17:24:30 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: cpoerschke@apache.org To: commits@lucene.apache.org Date: Thu, 30 Mar 2017 17:24:38 -0000 Message-Id: <036cc038c7bf4aa28f1ad57350058d5d@git.apache.org> In-Reply-To: References: X-Mailer: ASF-Git Admin Mailer Subject: [09/73] [abbrv] lucene-solr:jira/solr-6203: SOLR-9221: Remove Solr contribs: map-reduce, morphlines-core and morphlines-cell archived-at: Thu, 30 Mar 2017 17:24:36 -0000 http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/test-documents/testXML2.xml ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test-files/test-documents/testXML2.xml b/solr/contrib/morphlines-core/src/test-files/test-documents/testXML2.xml deleted file mode 100644 index 6611ee1..0000000 --- a/solr/contrib/morphlines-core/src/test-files/test-documents/testXML2.xml +++ /dev/null @@ -1,22 +0,0 @@ - - - - 123 - Hello World - Solr rocks - http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/test-morphlines/loadSolrBasic.conf ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test-files/test-morphlines/loadSolrBasic.conf b/solr/contrib/morphlines-core/src/test-files/test-morphlines/loadSolrBasic.conf deleted file mode 100644 index 1c02a9a..0000000 --- a/solr/contrib/morphlines-core/src/test-files/test-morphlines/loadSolrBasic.conf +++ /dev/null @@ -1,68 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -# morphline.conf example file -# this is a comment -// this is yet another comment - -# for details see https://github.com/typesafehub/config#optional-system-or-env-variable-overrides -SOLR_COLLECTION : "collection1" -SOLR_COLLECTION : ${?ENV_SOLR_COLLECTION} - -ZK_HOST : "127.0.0.1:2181/solr" -ZK_HOST : ${?ENV_ZK_HOST} - -SOLR_HOME_DIR : "example/solr/collection1" -SOLR_HOME_DIR : ${?ENV_SOLR_HOME_DIR} - -SOLR_LOCATOR : { - collection : ${SOLR_COLLECTION} - zkHost : ${ZK_HOST} - solrHomeDir : ${SOLR_HOME_DIR} - # batchSize : 1000 -} -SOLR_LOCATOR : ${?ENV_SOLR_LOCATOR} - -morphlines : [ - { - id : morphline1 - - # using globs (foo.bar.* or foo.bar.**) will not work in Java9 due to classpath scanning limitations - # so we enumarate every command (builder) we know this config uses below. (see SOLR-8876) - importCommands : ["org.kitesdk.morphline.stdlib.LogDebugBuilder", - "org.apache.solr.morphlines.solr.SanitizeUnknownSolrFieldsBuilder", - "org.apache.solr.morphlines.solr.LoadSolrBuilder"] - - commands : [ - { - sanitizeUnknownSolrFields { - solrLocator : ${SOLR_LOCATOR} - } - } - - { - loadSolr { - solrLocator : ${SOLR_LOCATOR} - boosts : { - id : 1.0 - } - } - } - - { logDebug { format : "output record: {}", args : ["@{}"] } } - ] - } -] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellDocumentTypes.conf ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellDocumentTypes.conf b/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellDocumentTypes.conf deleted file mode 100644 index 4d38256..0000000 --- a/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellDocumentTypes.conf +++ /dev/null @@ -1,304 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Application configuration file in HOCON format (Human-Optimized Config Object Notation). -# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md -# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). -# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html - -# morphline.conf example file -# this is a comment -// this is yet another comment - -morphlines : [ - { - id : morphline1 - - # using globs (foo.bar.* or foo.bar.**) will not work in Java9 due to classpath scanning limitations - # so we enumarate every command (builder) we know this config uses below. (see SOLR-8876) - importCommands : ["org.kitesdk.morphline.stdlib.LogDebugBuilder", - "org.kitesdk.morphline.stdlib.SetValuesBuilder", - "org.kitesdk.morphline.stdlib.CallParentPipeBuilder", - "org.kitesdk.morphline.stdlib.GenerateUUIDBuilder", - "org.kitesdk.morphline.stdlib.JavaBuilder", - "org.kitesdk.morphline.stdlib.TryRulesBuilder", - "org.kitesdk.morphline.stdlib.SeparateAttachmentsBuilder", - "org.kitesdk.morphline.stdio.ReadCSVBuilder", - "org.kitesdk.morphline.avro.ReadAvroContainerBuilder", - "org.kitesdk.morphline.avro.ExtractAvroPathsBuilder", - "org.kitesdk.morphline.avro.ExtractAvroTreeBuilder", - "org.kitesdk.morphline.tika.DetectMimeTypeBuilder", - "org.kitesdk.morphline.tika.decompress.DecompressBuilder", - "org.kitesdk.morphline.tika.decompress.UnpackBuilder", - "org.kitesdk.morphline.twitter.ReadJsonTestTweetsBuilder", - "org.apache.solr.morphlines.cell.SolrCellBuilder", - "org.apache.solr.morphlines.solr.SanitizeUnknownSolrFieldsBuilder", - "org.apache.solr.morphlines.solr.GenerateSolrSequenceKeyBuilder", - "org.apache.solr.morphlines.solr.LoadSolrBuilder"] - - commands : [ - { separateAttachments {} } - - # java command that doesn't do anything except for test compilation - { - java { - imports : "import java.util.*;" - code: """ - List tags = record.get("javaWithImports"); - return child.process(record); - """ - } - } - - # java command that doesn't do anything except for test compilation - { - java { - code: """ - List tags = record.get("javaWithoutImports"); - return child.process(record); - """ - } - } - - { - # used for auto-detection if MIME type isn't explicitly supplied - detectMimeType { - includeDefaultMimeTypes : true - mimeTypesFiles : ["RESOURCES_DIR/custom-mimetypes.xml"] - } - } - - { - tryRules { - throwExceptionIfAllRulesFailed : true - rules : [ - # next top-level rule: - { - commands : [ - { logDebug { format : "hello unpack" } } - { unpack {} } - { generateUUID {} } - { callParentPipe {} } - ] - } - - { - commands : [ - { logDebug { format : "hello decompress" } } - { decompress {} } - { callParentPipe {} } - ] - } - - { - commands : [ - { - readCSV { - supportedMimeTypes : [text/csv] - charset : UTF-8 - ignoreFirstLine : false - columns : [ user_screen_name, text ] - } - } - - { - generateUUID { - field : id - preserveExisting : false - } - } - - { - sanitizeUnknownSolrFields { - solrLocator : ${SOLR_LOCATOR} - } - } - ] - } - - { - commands : [ - { - readAvroContainer { - supportedMimeTypes : [avro/binary] - # readerSchemaString : "" # optional, avro json schema blurb for getSchema() - # readerSchemaFile : /path/to/syslog.avsc - } - } - - { extractAvroTree {} } - - { - setValues { - id : "@{/id}" - user_screen_name : "@{/user_screen_name}" - text : "@{/text}" - } - } - - { - sanitizeUnknownSolrFields { - solrLocator : ${SOLR_LOCATOR} - } - } - ] - } - - { - commands : [ - { - readJsonTestTweets { - supportedMimeTypes : ["mytwittertest/json+delimited+length"] - } - } - - { - sanitizeUnknownSolrFields { - solrLocator : ${SOLR_LOCATOR} - } - } - ] - } - - # next top-level rule: - { - commands : [ - { logDebug { format : "hello solrcell" } } - { - # wrap SolrCell around an HTML Tika parser - solrCell { - solrLocator : ${SOLR_LOCATOR} - # captureAttr : true # default is false - capture : [ - - # twitter feed schema - user_friends_count - user_location - user_description - user_statuses_count - user_followers_count - user_name - user_screen_name - created_at - text - retweet_count - retweeted - in_reply_to_user_id - source - in_reply_to_status_id - media_url_https - expanded_url - - # file metadata - file_download_url - file_upload_url - file_scheme - file_host - file_port - file_path - file_name - file_length - file_last_modified - file_owner - file_group - file_permissions_user - file_permissions_group - file_permissions_other - file_permissions_stickybit - ] - - fmap : { content : text, content-type : content_type } # rename "content" field to "text" fields - dateFormats : [ "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"] # various java.text.SimpleDateFormat - # xpath : "/xhtml:html/xhtml:body/xhtml:div/descendant:node()" - uprefix : "ignored_" - lowernames : true - # solrContentHandlerFactory : org.apache.solr.tika.TrimSolrContentHandlerFactory - - # Tika parsers to be registered. If multiple parsers support the same MIME type, - # the parser is chosen that is closest to the bottom in this list: - parsers : [ - { parser : org.apache.tika.parser.asm.ClassParser } - # { parser : org.apache.tika.parser.AutoDetectParser } - # { parser : org.gagravarr.tika.OggParser, additionalSupportedMimeTypes : [audio/ogg] } - { parser : org.gagravarr.tika.FlacParser } - { parser : org.apache.tika.parser.audio.AudioParser } - { parser : org.apache.tika.parser.audio.MidiParser } - { parser : org.apache.tika.parser.crypto.Pkcs7Parser } - { parser : org.apache.tika.parser.dwg.DWGParser } - { parser : org.apache.tika.parser.epub.EpubParser } - { parser : org.apache.tika.parser.executable.ExecutableParser } - { parser : org.apache.tika.parser.feed.FeedParser } - { parser : org.apache.tika.parser.font.AdobeFontMetricParser } - { parser : org.apache.tika.parser.font.TrueTypeParser } - { parser : org.apache.tika.parser.xml.XMLParser } - { parser : org.apache.tika.parser.html.HtmlParser } - { parser : org.apache.tika.parser.image.ImageParser } - { parser : org.apache.tika.parser.image.PSDParser } - { parser : org.apache.tika.parser.image.TiffParser } - { parser : org.apache.tika.parser.iptc.IptcAnpaParser } - { parser : org.apache.tika.parser.iwork.IWorkPackageParser } - { parser : org.apache.tika.parser.jpeg.JpegParser } - { parser : org.apache.tika.parser.mail.RFC822Parser } - { parser : org.apache.tika.parser.mbox.MboxParser, additionalSupportedMimeTypes : [message/x-emlx] } - { parser : org.apache.tika.parser.microsoft.OfficeParser } - { parser : org.apache.tika.parser.microsoft.TNEFParser } - { parser : org.apache.tika.parser.microsoft.ooxml.OOXMLParser } - { parser : org.apache.tika.parser.mp3.Mp3Parser } - { parser : org.apache.tika.parser.mp4.MP4Parser } - { parser : org.apache.tika.parser.hdf.HDFParser } - { parser : org.apache.tika.parser.netcdf.NetCDFParser } - { parser : org.apache.tika.parser.odf.OpenDocumentParser } - { parser : org.apache.tika.parser.pdf.PDFParser } - { parser : org.apache.tika.parser.pkg.CompressorParser } - { parser : org.apache.tika.parser.pkg.PackageParser } - { parser : org.apache.tika.parser.rtf.RTFParser } - { parser : org.apache.tika.parser.txt.TXTParser } - { parser : org.apache.tika.parser.video.FLVParser } - { parser : org.apache.tika.parser.xml.DcXMLParser } - { parser : org.apache.tika.parser.xml.FictionBookParser } - { parser : org.apache.tika.parser.chm.ChmParser } - #{ parser : org.apache.tika.parser.AutoDetectParser } - ] - } - } - - { generateUUID { field : ignored_base_id } } - - { - generateSolrSequenceKey { - baseIdField: ignored_base_id - solrLocator : ${SOLR_LOCATOR} - } - } - - ] - } - ] - } - } - - { - loadSolr { - solrLocator : ${SOLR_LOCATOR} - } - } - - { - logDebug { - format : "My output record: {}" - args : ["@{}"] - } - } - - ] - } -] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellJPGCompressed.conf ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellJPGCompressed.conf b/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellJPGCompressed.conf deleted file mode 100644 index 85cb2a7..0000000 --- a/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellJPGCompressed.conf +++ /dev/null @@ -1,150 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Application configuration file in HOCON format (Human-Optimized Config Object Notation). -# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md -# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). -# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html - -# morphline.conf example file -# this is a comment -// this is yet another comment - -morphlines : [ - { - id : morphline1 - - # using globs (foo.bar.* or foo.bar.**) will not work in Java9 due to classpath scanning limitations - # so we enumarate every command (builder) we know this config uses below. (see SOLR-8876) - importCommands : ["org.kitesdk.morphline.stdlib.LogDebugBuilder", - "org.kitesdk.morphline.stdlib.CallParentPipeBuilder", - "org.kitesdk.morphline.stdlib.GenerateUUIDBuilder", - "org.kitesdk.morphline.stdlib.JavaBuilder", - "org.kitesdk.morphline.stdlib.TryRulesBuilder", - "org.kitesdk.morphline.stdlib.SeparateAttachmentsBuilder", - "org.kitesdk.morphline.tika.DetectMimeTypeBuilder", - "org.kitesdk.morphline.tika.decompress.DecompressBuilder", - "org.kitesdk.morphline.tika.decompress.UnpackBuilder", - "org.apache.solr.morphlines.cell.SolrCellBuilder", - "org.apache.solr.morphlines.solr.GenerateSolrSequenceKeyBuilder", - "org.apache.solr.morphlines.solr.LoadSolrBuilder"] - - - commands : [ - { separateAttachments {} } - - # java command that doesn't do anything except for test compilation - { - java { - imports : "import java.util.*;" - code: """ - List tags = record.get("javaWithImports"); - return child.process(record); - """ - } - } - - # java command that doesn't do anything except for test compilation - { - java { - code: """ - List tags = record.get("javaWithoutImports"); - return child.process(record); - """ - } - } - - { - # auto-detect MIME type if it isn't explicitly supplied - detectMimeType { - includeDefaultMimeTypes : true - } - } - - { - tryRules { - throwExceptionIfAllRulesFailed : true - rules : [ - # next top-level rule: - { - commands : [ - { logDebug { format : "hello unpack" } } - { unpack {} } - { callParentPipe {} } - ] - } - - { - commands : [ - { logDebug { format : "hello decompress" } } - { decompress {} } - { callParentPipe {} } - ] - } - - # next top-level rule: - { - commands : [ - { logDebug { format : "hello solrcell" } } - { - # wrap SolrCell around a JPG Tika parser - solrCell { - solrLocator : ${SOLR_LOCATOR} - captureAttr : true # default is false - capture : [content, a, h1, h2] # extract some fields - fmap : { exif_image_height : text, a : anchor, h1 : heading1 } # rename some fields - dateFormats : [ "yyyy-MM-dd'T'HH:mm:ss", "yyyy-MM-dd"] # various java.text.SimpleDateFormat - xpath : "/xhtml:html/xhtml:body/xhtml:div/descendant:node()" - uprefix : "ignored_" - lowernames : true - solrContentHandlerFactory : org.apache.solr.morphlines.cell.TrimSolrContentHandlerFactory - parsers : [ # nested Tika parsers - { parser : org.apache.tika.parser.jpeg.JpegParser } - ] - } - } - - { logDebug { format : "solrcell output: {}", args : ["@{}"] } } - ] - } - ] - } - } - - { generateUUID { field : ignored_base_id } } - - { - generateSolrSequenceKey { - baseIdField: ignored_base_id - solrLocator : ${SOLR_LOCATOR} - } - } - - { - loadSolr { - solrLocator : ${SOLR_LOCATOR} - } - } - - { - logDebug { - format : "My output record: {}" - args : ["@{}"] - } - } - - ] - } -] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellXML.conf ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellXML.conf b/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellXML.conf deleted file mode 100644 index 9e840a9..0000000 --- a/solr/contrib/morphlines-core/src/test-files/test-morphlines/solrCellXML.conf +++ /dev/null @@ -1,78 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Application configuration file in HOCON format (Human-Optimized Config Object Notation). -# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md -# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). -# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html - -# morphline.conf example file -# this is a comment -// this is yet another comment - -morphlines : [ - { - id : morphline1 - - # using globs (foo.bar.* or foo.bar.**) will not work in Java9 due to classpath scanning limitations - # so we enumarate every command (builder) we know this config uses below. (see SOLR-8876) - importCommands : ["org.kitesdk.morphline.stdlib.LogDebugBuilder", - "org.kitesdk.morphline.stdlib.AddValuesBuilder", - "org.apache.solr.morphlines.cell.SolrCellBuilder", - "org.apache.solr.morphlines.solr.GenerateSolrSequenceKeyBuilder", - "org.apache.solr.morphlines.solr.SanitizeUnknownSolrFieldsBuilder", - "org.apache.solr.morphlines.solr.LoadSolrBuilder"] - - - commands : [ - { - addValues { _attachment_mimetype : application/xml } - # alternatively, consider using detectMimeTypes command - } - - { - # wrap SolrCell around a JPG Tika parser - solrCell { - solrLocator : ${SOLR_LOCATOR} - parsers : [ # nested Tika parsers - { parser : org.apache.tika.parser.xml.XMLParser } - ] - } - } - - { - generateSolrSequenceKey { - baseIdField: base_id - solrLocator : ${SOLR_LOCATOR} - } - } - - { - sanitizeUnknownSolrFields { - solrLocator : ${SOLR_LOCATOR} - } - } - - { logDebug { format : "solrcell output: {}", args : ["@{}"] } } - - { - loadSolr { - solrLocator : ${SOLR_LOCATOR} - } - } - - ] - } -] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/test-morphlines/tokenizeText.conf ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test-files/test-morphlines/tokenizeText.conf b/solr/contrib/morphlines-core/src/test-files/test-morphlines/tokenizeText.conf deleted file mode 100644 index d9354c4..0000000 --- a/solr/contrib/morphlines-core/src/test-files/test-morphlines/tokenizeText.conf +++ /dev/null @@ -1,38 +0,0 @@ -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -morphlines : [ - { - id : morphline1 - - # using globs (foo.bar.* or foo.bar.**) will not work in Java9 due to classpath scanning limitations - # so we enumarate every command (builder) we know this config uses below. (see SOLR-8876) - importCommands : ["org.kitesdk.morphline.stdlib.LogDebugBuilder", - "org.apache.solr.morphlines.solr.TokenizeTextBuilder"] - - commands : [ - { - tokenizeText { - inputField : message - outputField : tokens - solrFieldType : text_en - solrLocator : ${SOLR_LOCATOR} - } - } - - { logDebug { format : "output record {}", args : ["@{}"] } } - ] - } -] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/test-morphlines/tutorialReadAvroContainer.conf ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test-files/test-morphlines/tutorialReadAvroContainer.conf b/solr/contrib/morphlines-core/src/test-files/test-morphlines/tutorialReadAvroContainer.conf deleted file mode 100644 index eee4ba5..0000000 --- a/solr/contrib/morphlines-core/src/test-files/test-morphlines/tutorialReadAvroContainer.conf +++ /dev/null @@ -1,145 +0,0 @@ -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Application configuration file in HOCON format (Human-Optimized Config Object Notation). -# HOCON syntax is defined at http://github.com/typesafehub/config/blob/master/HOCON.md -# and also used by Akka (http://www.akka.io) and Play (http://www.playframework.org/). -# For more examples see http://doc.akka.io/docs/akka/2.1.2/general/configuration.html - -# morphline.conf example file -# this is a comment - -# Specify server locations in a SOLR_LOCATOR variable; used later in variable substitutions: -SOLR_LOCATOR : { - # Name of solr collection - collection : collection1 - - # ZooKeeper ensemble - zkHost : "127.0.0.1:2181/solr" - - # The maximum number of documents to send to Solr per network batch (throughput knob) - # batchSize : 1000 -} - -# Specify an array of one or more morphlines, each of which defines an ETL -# transformation chain. A morphline consists of one or more (potentially -# nested) commands. A morphline is a way to consume records (e.g. Flume events, -# HDFS files or blocks), turn them into a stream of records, and pipe the stream -# of records through a set of easily configurable transformations on its way to -# Solr. -morphlines : [ - { - # Name used to identify a morphline. E.g. used if there are multiple morphlines in a - # morphline config file - id : morphline1 - - # using globs (foo.bar.* or foo.bar.**) will not work in Java9 due to classpath scanning limitations - # so we enumarate every command (builder) we know this config uses below. (see SOLR-8876) - importCommands : ["org.kitesdk.morphline.stdlib.LogDebugBuilder", - "org.kitesdk.morphline.avro.ReadAvroContainerBuilder", - "org.kitesdk.morphline.avro.ExtractAvroPathsBuilder", - "org.kitesdk.morphline.stdlib.ConvertTimestampBuilder", - "org.apache.solr.morphlines.solr.SanitizeUnknownSolrFieldsBuilder", - "org.apache.solr.morphlines.solr.LoadSolrBuilder"] - - commands : [ - { - # Parse Avro container file and emit a record for each avro object - readAvroContainer { - # Optionally, require the input record to match one of these MIME types: - # supportedMimeTypes : [avro/binary] - - # Optionally, use a custom Avro schema in JSON format inline: - # readerSchemaString : """""" - - # Optionally, use a custom Avro schema file in JSON format: - # readerSchemaFile : /path/to/syslog.avsc - } - } - - { - # Consume the output record of the previous command and pipe another record downstream. - # - # extractAvroPaths is a command that uses zero or more avro path expressions to extract - # values from an Avro object. Each expression consists of a record output field name (on - # the left side of the colon ':') as well as zero or more path steps (on the right hand - # side), each path step separated by a '/' slash. Avro arrays are traversed with the '[]' - # notation. - # - # The result of a path expression is a list of objects, each of which is added to the - # given record output field. - # - # The path language supports all Avro concepts, including nested structures, records, - # arrays, maps, unions, etc, as well as a flatten option that collects the primitives in - # a subtree into a flat list. - extractAvroPaths { - flatten : false - paths : { - id : /id - text : /text - user_friends_count : /user_friends_count - user_location : /user_location - user_description : /user_description - user_statuses_count : /user_statuses_count - user_followers_count : /user_followers_count - user_name : /user_name - user_screen_name : /user_screen_name - created_at : /created_at - retweet_count : /retweet_count - retweeted : /retweeted - in_reply_to_user_id : /in_reply_to_user_id - source : /source - in_reply_to_status_id : /in_reply_to_status_id - media_url_https : /media_url_https - expanded_url : /expanded_url - } - } - } - - # Consume the output record of the previous command and pipe another record downstream. - # - # convert timestamp field to native Solr timestamp format - # e.g. 2012-09-06T07:14:34Z to 2012-09-06T07:14:34.000Z - { - convertTimestamp { - field : created_at - inputFormats : ["yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd"] - inputTimezone : UTC -# outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSSZ" - outputTimezone : America/Los_Angeles - } - } - - # Consume the output record of the previous command and pipe another record downstream. - # - # This command sanitizes record fields that are unknown to Solr schema.xml by deleting - # them. Recall that Solr throws an exception on any attempt to load a document that - # contains a field that isn't specified in schema.xml. - { - sanitizeUnknownSolrFields { - # Location from which to fetch Solr schema - solrLocator : ${SOLR_LOCATOR} - } - } - - # log the record at DEBUG level to SLF4J - { logDebug { format : "output record: {}", args : ["@{}"] } } - - # load the record into a Solr server or MapReduce Reducer. - { - loadSolr { - solrLocator : ${SOLR_LOCATOR} - } - } - ] - } -] http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java deleted file mode 100644 index c91f31b..0000000 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java +++ /dev/null @@ -1,318 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.morphlines.solr; - -import com.codahale.metrics.MetricRegistry; -import com.google.common.io.Files; -import com.typesafe.config.Config; -import org.apache.commons.io.FileUtils; -import org.apache.solr.SolrTestCaseJ4; -import org.apache.solr.client.solrj.SolrClient; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.impl.HttpSolrClient; -import org.apache.solr.client.solrj.impl.XMLResponseParser; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrDocument; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.kitesdk.morphline.api.Collector; -import org.kitesdk.morphline.api.Command; -import org.kitesdk.morphline.api.MorphlineContext; -import org.kitesdk.morphline.api.Record; -import org.kitesdk.morphline.base.Compiler; -import org.kitesdk.morphline.base.FaultTolerance; -import org.kitesdk.morphline.base.Fields; -import org.kitesdk.morphline.base.Notifications; -import org.kitesdk.morphline.stdlib.PipeBuilder; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.lang.invoke.MethodHandles; -import java.util.Arrays; -import java.util.Calendar; -import java.util.Collection; -import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Locale; -import java.util.Map; -import java.util.Map.Entry; -import java.util.TimeZone; -import java.util.concurrent.atomic.AtomicInteger; - -public class AbstractSolrMorphlineTestBase extends SolrTestCaseJ4 { - private static Locale savedLocale; - protected Collector collector; - protected Command morphline; - protected SolrClient solrClient; - protected DocumentLoader testServer; - - protected static final boolean TEST_WITH_EMBEDDED_SOLR_SERVER = true; - protected static final String EXTERNAL_SOLR_SERVER_URL = System.getProperty("externalSolrServer"); -// protected static final String EXTERNAL_SOLR_SERVER_URL = "http://127.0.0.1:8983/solr"; - - protected static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent(); - protected static final String DEFAULT_BASE_DIR = "solr"; - protected static final AtomicInteger SEQ_NUM = new AtomicInteger(); - protected static final AtomicInteger SEQ_NUM2 = new AtomicInteger(); - - protected static final Object NON_EMPTY_FIELD = new Object(); - - private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - protected String tempDir; - - @BeforeClass - public static void beforeClass() throws Exception { - - // TODO: test doesn't work with some Locales, see SOLR-6458 - savedLocale = Locale.getDefault(); - Locale.setDefault(Locale.ENGLISH); - - // we leave this in case the above is addressed - assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)", - new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage())); - - myInitCore(DEFAULT_BASE_DIR); - } - - @AfterClass - public static void afterClass() throws Exception { - if (savedLocale != null) { - Locale.setDefault(savedLocale); - } - savedLocale = null; - } - - protected static void myInitCore(String baseDirName) throws Exception { - String solrHome = RESOURCES_DIR + File.separator + baseDirName; - initCore( - "solrconfig.xml", "schema.xml", solrHome - ); - } - - @Before - public void setUp() throws Exception { - super.setUp(); - collector = new Collector(); - - if (EXTERNAL_SOLR_SERVER_URL != null) { - //solrServer = new ConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2); - //solrServer = new SafeConcurrentUpdateSolrServer(EXTERNAL_SOLR_SERVER_URL, 2, 2); - solrClient = getHttpSolrClient(EXTERNAL_SOLR_SERVER_URL); - ((HttpSolrClient) solrClient).setParser(new XMLResponseParser()); - } else { - if (TEST_WITH_EMBEDDED_SOLR_SERVER) { - solrClient = new EmbeddedTestSolrServer(h.getCoreContainer(), DEFAULT_TEST_CORENAME); - } else { - throw new RuntimeException("Not yet implemented"); - //solrServer = new TestSolrServer(getSolrClient()); - } - } - - int batchSize = SEQ_NUM2.incrementAndGet() % 2 == 0 ? 100 : 1; //SolrInspector.DEFAULT_SOLR_SERVER_BATCH_SIZE : 1; - testServer = new SolrClientDocumentLoader(solrClient, batchSize); - deleteAllDocuments(); - - tempDir = createTempDir().toFile().getAbsolutePath(); - } - - @After - public void tearDown() throws Exception { - collector = null; - solrClient.close(); - solrClient = null; - super.tearDown(); - } - - protected void testDocumentTypesInternal( - String[] files, - Map expectedRecords, - Map> expectedRecordContents) throws Exception { - - assumeTrue("This test has issues with this locale: https://issues.apache.org/jira/browse/SOLR-5778", - "GregorianCalendar".equals(Calendar.getInstance(TimeZone.getDefault(), Locale.getDefault()).getClass().getSimpleName())); - deleteAllDocuments(); - int numDocs = 0; - for (int i = 0; i < 1; i++) { - - for (String file : files) { - File f = new File(file); - byte[] body = Files.toByteArray(f); - Record event = new Record(); - //event.put(Fields.ID, docId++); - event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(body)); - event.getFields().put(Fields.ATTACHMENT_NAME, f.getName()); - event.getFields().put(Fields.BASE_ID, f.getName()); - collector.reset(); - load(event); - Integer count = expectedRecords.get(file); - if (count != null) { - numDocs += count; - } else { - numDocs++; - } - assertEquals("unexpected results in " + file, numDocs, queryResultSetSize("*:*")); - Map expectedContents = expectedRecordContents.get(file); - if (expectedContents != null) { - Record actual = collector.getFirstRecord(); - for (Map.Entry entry : expectedContents.entrySet()) { - if (entry.getValue() == NON_EMPTY_FIELD) { - assertNotNull(entry.getKey()); - assertTrue(actual.getFirstValue(entry.getKey()).toString().length() > 0); - } else if (entry.getValue() == null) { - assertEquals("key:" + entry.getKey(), 0, actual.get(entry.getKey()).size()); - } else { - assertEquals("key:" + entry.getKey(), Arrays.asList(entry.getValue()), actual.get(entry.getKey())); - } - } - } - } - } - assertEquals(numDocs, queryResultSetSize("*:*")); - } - - private boolean load(Record record) { - Notifications.notifyStartSession(morphline); - return morphline.process(record); - } - - protected int queryResultSetSize(String query) { -// return collector.getRecords().size(); - try { - testServer.commitTransaction(); - solrClient.commit(false, true, true); - QueryResponse rsp = solrClient.query(new SolrQuery(query).setRows(Integer.MAX_VALUE)); - LOGGER.debug("rsp: {}", rsp); - int i = 0; - for (SolrDocument doc : rsp.getResults()) { - LOGGER.debug("rspDoc #{}: {}", i++, doc); - } - int size = rsp.getResults().size(); - return size; - } catch (Exception e) { - throw new RuntimeException(e); - } - } - - private void deleteAllDocuments() throws SolrServerException, IOException { - collector.reset(); - SolrClient s = solrClient; - s.deleteByQuery("*:*"); // delete everything! - s.commit(); - } - - protected Command createMorphline(String file) throws IOException { - return new PipeBuilder().build(parse(file), null, collector, createMorphlineContext()); - } - - private MorphlineContext createMorphlineContext() { - return new SolrMorphlineContext.Builder() - .setDocumentLoader(testServer) -// .setDocumentLoader(new CollectingDocumentLoader(100)) - .setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName())) - .setMetricRegistry(new MetricRegistry()) - .build(); - } - - private Config parse(String file) throws IOException { - SolrLocator locator = new SolrLocator(createMorphlineContext()); - locator.setSolrHomeDir(testSolrHome + "/collection1"); - File morphlineFile; - if (new File(file).isAbsolute()) { - morphlineFile = new File(file + ".conf"); - } else { - morphlineFile = new File(RESOURCES_DIR + "/" + file + ".conf"); - } - Config config = new Compiler().parse(morphlineFile, locator.toConfig("SOLR_LOCATOR")); - config = config.getConfigList("morphlines").get(0); - return config; - } - - protected void startSession() { - Notifications.notifyStartSession(morphline); - } - - protected void testDocumentContent(HashMap expectedResultMap) - throws Exception { - QueryResponse rsp = solrClient.query(new SolrQuery("*:*").setRows(Integer.MAX_VALUE)); - // Check that every expected field/values shows up in the actual query - for (Entry current : expectedResultMap.entrySet()) { - String field = current.getKey(); - for (String expectedFieldValue : current.getValue().getFieldValues()) { - ExpectedResult.CompareType compareType = current.getValue().getCompareType(); - boolean foundField = false; - - for (SolrDocument doc : rsp.getResults()) { - Collection actualFieldValues = doc.getFieldValues(field); - if (compareType == ExpectedResult.CompareType.equals) { - if (actualFieldValues != null && actualFieldValues.contains(expectedFieldValue)) { - foundField = true; - break; - } - } - else { - for (Iterator it = actualFieldValues.iterator(); it.hasNext(); ) { - String actualValue = it.next().toString(); // test only supports string comparison - if (actualFieldValues != null && actualValue.contains(expectedFieldValue)) { - foundField = true; - break; - } - } - } - } - assert(foundField); // didn't find expected field/value in query - } - } - } - - /** - * Representation of the expected output of a SolrQuery. - */ - protected static class ExpectedResult { - private HashSet fieldValues; - public enum CompareType { - equals, // Compare with equals, i.e. actual.equals(expected) - contains; // Compare with contains, i.e. actual.contains(expected) - } - private CompareType compareType; - - public ExpectedResult(HashSet fieldValues, CompareType compareType) { - this.fieldValues = fieldValues; - this.compareType = compareType; - } - public HashSet getFieldValues() { return fieldValues; } - public CompareType getCompareType() { return compareType; } - } - - public static void setupMorphline(String tempDir, String file, boolean replaceSolrLocator) throws IOException { - String morphlineText = FileUtils.readFileToString(new File(RESOURCES_DIR + "/" + file + ".conf"), "UTF-8"); - morphlineText = morphlineText.replace("RESOURCES_DIR", new File(tempDir).getAbsolutePath()); - if (replaceSolrLocator) { - morphlineText = morphlineText.replace("${SOLR_LOCATOR}", - "{ collection : collection1 }"); - } - new File(tempDir + "/" + file + ".conf").getParentFile().mkdirs(); - FileUtils.writeStringToFile(new File(tempDir + "/" + file + ".conf"), morphlineText, "UTF-8"); - } -} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java deleted file mode 100644 index 31e7ebf..0000000 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineZkTestBase.java +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.morphlines.solr; - -import java.io.File; -import java.io.IOException; -import java.util.Iterator; -import java.util.Locale; - -import com.codahale.metrics.MetricRegistry; -import com.google.common.collect.ListMultimap; -import com.typesafe.config.Config; -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.cloud.AbstractDistribZkTestBase; -import org.apache.solr.cloud.SolrCloudTestCase; -import org.apache.solr.common.SolrDocument; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.kitesdk.morphline.api.Collector; -import org.kitesdk.morphline.api.Command; -import org.kitesdk.morphline.api.MorphlineContext; -import org.kitesdk.morphline.api.Record; -import org.kitesdk.morphline.base.Compiler; -import org.kitesdk.morphline.base.FaultTolerance; -import org.kitesdk.morphline.base.Notifications; -import org.kitesdk.morphline.stdlib.PipeBuilder; - -public abstract class AbstractSolrMorphlineZkTestBase extends SolrCloudTestCase { - - protected static final String COLLECTION = "collection1"; - - protected static final int TIMEOUT = 30; - - @BeforeClass - public static void setupCluster() throws Exception { - // set some system properties for use by tests - System.setProperty("solr.test.sys.prop1", "propone"); - System.setProperty("solr.test.sys.prop2", "proptwo"); - - configureCluster(2) - .addConfig("conf", SOLR_CONF_DIR.toPath()) - .configure(); - - CollectionAdminRequest.createCollection(COLLECTION, "conf", 2, 1) - .processAndWait(cluster.getSolrClient(), TIMEOUT); - AbstractDistribZkTestBase.waitForRecoveriesToFinish(COLLECTION, cluster.getSolrClient().getZkStateReader(), - false, true, TIMEOUT); - } - - @AfterClass - public static void afterClass() { - System.clearProperty("solr.test.sys.prop1"); - System.clearProperty("solr.test.sys.prop2"); - } - - protected static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent(); - private static final File SOLR_CONF_DIR = new File(RESOURCES_DIR + "/solr/collection1/conf"); - - protected Collector collector; - protected Command morphline; - - @BeforeClass - public static void setupClass() throws Exception { - - assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)", - new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage())); - - } - - @Before - public void setup() throws Exception { - collector = new Collector(); - cluster.waitForAllNodes(DEFAULT_TIMEOUT); - } - - protected void commit() throws Exception { - Notifications.notifyCommitTransaction(morphline); - } - - protected Command parse(String file) throws IOException { - return parse(file, COLLECTION); - } - - protected Command parse(String file, String collection) throws IOException { - SolrLocator locator = new SolrLocator(createMorphlineContext()); - locator.setCollectionName(collection); - locator.setZkHost(cluster.getZkServer().getZkAddress()); - //locator.setServerUrl(cloudJettys.get(0).url); // TODO: download IndexSchema from solrUrl not yet implemented - //locator.setSolrHomeDir(SOLR_HOME_DIR.getPath()); - Config config = new Compiler().parse(new File(RESOURCES_DIR + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR")); - config = config.getConfigList("morphlines").get(0); - return createMorphline(config); - } - - private Command createMorphline(Config config) { - return new PipeBuilder().build(config, null, collector, createMorphlineContext()); - } - - private MorphlineContext createMorphlineContext() { - return new MorphlineContext.Builder() - .setExceptionHandler(new FaultTolerance(false, false, SolrServerException.class.getName())) - .setMetricRegistry(new MetricRegistry()) - .build(); - } - - protected void startSession() { - Notifications.notifyStartSession(morphline); - } - - protected ListMultimap next(Iterator iter) { - SolrDocument doc = iter.next(); - Record record = toRecord(doc); - record.removeAll("_version_"); // the values of this field are unknown and internal to solr - return record.getFields(); - } - - private Record toRecord(SolrDocument doc) { - Record record = new Record(); - for (String key : doc.keySet()) { - record.getFields().replaceValues(key, doc.getFieldValues(key)); - } - return record; - } - -} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java deleted file mode 100644 index 3864e6d..0000000 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/CollectingDocumentLoader.java +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.morphlines.solr; - -import java.lang.invoke.MethodHandles; -import java.util.ArrayList; -import java.util.List; - -import org.apache.solr.client.solrj.response.SolrPingResponse; -import org.apache.solr.client.solrj.response.UpdateResponse; -import org.apache.solr.common.SolrInputDocument; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -/** - * A mockup DocumentLoader implementation for unit tests; collects all documents into a main memory list. - */ -class CollectingDocumentLoader implements DocumentLoader { - - private final int batchSize; - private final List batch = new ArrayList<> (); - private List results = new ArrayList<> (); - - private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - - public CollectingDocumentLoader(int batchSize) { - if (batchSize <= 0) { - throw new IllegalArgumentException("batchSize must be a positive number: " + batchSize); - } - this.batchSize = batchSize; - } - - @Override - public void beginTransaction() { - LOGGER.trace("beginTransaction"); - batch.clear(); - } - - @Override - public void load(SolrInputDocument doc) { - LOGGER.trace("load doc: {}", doc); - batch.add(doc); - if (batch.size() >= batchSize) { - loadBatch(); - } - } - - @Override - public void commitTransaction() { - LOGGER.trace("commitTransaction"); - if (batch.size() > 0) { - loadBatch(); - } - } - - private void loadBatch() { - try { - results.addAll(batch); - } finally { - batch.clear(); - } - } - - @Override - public UpdateResponse rollbackTransaction() { - LOGGER.trace("rollback"); - return new UpdateResponse(); - } - - @Override - public void shutdown() { - LOGGER.trace("shutdown"); - } - - @Override - public SolrPingResponse ping() { - LOGGER.trace("ping"); - return new SolrPingResponse(); - } - -} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java deleted file mode 100644 index 9599511..0000000 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/EmbeddedTestSolrServer.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.morphlines.solr; - -import org.apache.solr.client.solrj.SolrServerException; -import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer; -import org.apache.solr.client.solrj.response.UpdateResponse; -import org.apache.solr.core.CoreContainer; - -import java.io.IOException; - -/** - * An EmbeddedSolrServer that supresses close and rollback requests as - * necessary for testing - */ -public class EmbeddedTestSolrServer extends EmbeddedSolrServer { - - public EmbeddedTestSolrServer(CoreContainer coreContainer, String coreName) { - super(coreContainer, coreName); - } - - @Override - public void close() { - ; // NOP - } - - @Override - public UpdateResponse rollback() throws SolrServerException, IOException { - return new UpdateResponse(); - } - -} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java deleted file mode 100644 index 64fa2ec..0000000 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.morphlines.solr; - -import org.junit.Test; -import org.kitesdk.morphline.api.Record; -import org.kitesdk.morphline.base.Fields; -import org.kitesdk.morphline.base.Notifications; - -import java.io.File; -import java.util.Arrays; - -public class SolrMorphlineTest extends AbstractSolrMorphlineTestBase { - - @Test - public void testLoadSolrBasic() throws Exception { - //System.setProperty("ENV_SOLR_HOME", testSolrHome + "/collection1"); - morphline = createMorphline("test-morphlines/loadSolrBasic"); - //System.clearProperty("ENV_SOLR_HOME"); - Record record = new Record(); - record.put(Fields.ID, "id0"); - record.put("first_name", "Nadja"); // will be sanitized - startSession(); - Notifications.notifyBeginTransaction(morphline); - assertTrue(morphline.process(record)); - assertEquals(1, collector.getNumStartEvents()); - Notifications.notifyCommitTransaction(morphline); - Record expected = new Record(); - expected.put(Fields.ID, "id0"); - assertEquals(Arrays.asList(expected), collector.getRecords()); - assertEquals(1, queryResultSetSize("*:*")); - Notifications.notifyRollbackTransaction(morphline); - Notifications.notifyShutdown(morphline); - } - - @Test - public void testTokenizeText() throws Exception { - morphline = createMorphline("test-morphlines" + File.separator + "tokenizeText"); - for (int i = 0; i < 3; i++) { - Record record = new Record(); - record.put(Fields.MESSAGE, "Hello World!"); - record.put(Fields.MESSAGE, "\nFoo@Bar.com #%()123"); - Record expected = record.copy(); - expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123")); - collector.reset(); - startSession(); - Notifications.notifyBeginTransaction(morphline); - assertTrue(morphline.process(record)); - assertEquals(1, collector.getNumStartEvents()); - Notifications.notifyCommitTransaction(morphline); - assertEquals(expected, collector.getFirstRecord()); - } - } - -} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java deleted file mode 100644 index 74c8824..0000000 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAliasTest.java +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.morphlines.solr; - -import java.io.File; -import java.util.Iterator; - -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; -import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.request.CollectionAdminRequest; -import org.apache.solr.client.solrj.request.UpdateRequest; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.util.BadHdfsThreadsFilter; -import org.junit.Test; -import org.kitesdk.morphline.api.Record; -import org.kitesdk.morphline.base.Fields; -import org.kitesdk.morphline.base.Notifications; - -@ThreadLeakFilters(defaultFilters = true, filters = { - BadHdfsThreadsFilter.class // hdfs currently leaks thread(s) -}) -@Slow -public class SolrMorphlineZkAliasTest extends AbstractSolrMorphlineZkTestBase { - - @Test - public void test() throws Exception { - - CollectionAdminRequest.createAlias("aliascollection", "collection1") - .process(cluster.getSolrClient()); - - morphline = parse("test-morphlines" + File.separator + "loadSolrBasic", "aliascollection"); - Record record = new Record(); - record.put(Fields.ID, "id0-innsbruck"); - record.put("text", "mytext"); - record.put("user_screen_name", "foo"); - record.put("first_name", "Nadja"); // will be sanitized - startSession(); - assertEquals(1, collector.getNumStartEvents()); - Notifications.notifyBeginTransaction(morphline); - assertTrue(morphline.process(record)); - - record = new Record(); - record.put(Fields.ID, "id1-innsbruck"); - record.put("text", "mytext1"); - record.put("user_screen_name", "foo1"); - record.put("first_name", "Nadja1"); // will be sanitized - assertTrue(morphline.process(record)); - - Record expected = new Record(); - expected.put(Fields.ID, "id0-innsbruck"); - expected.put("text", "mytext"); - expected.put("user_screen_name", "foo"); - Iterator citer = collector.getRecords().iterator(); - assertEquals(expected, citer.next()); - - Record expected2 = new Record(); - expected2.put(Fields.ID, "id1-innsbruck"); - expected2.put("text", "mytext1"); - expected2.put("user_screen_name", "foo1"); - assertEquals(expected2, citer.next()); - - assertFalse(citer.hasNext()); - - Notifications.notifyCommitTransaction(morphline); - new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION); - - QueryResponse rsp = cluster.getSolrClient() - .query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); - //System.out.println(rsp); - Iterator iter = rsp.getResults().iterator(); - assertEquals(expected.getFields(), next(iter)); - assertEquals(expected2.getFields(), next(iter)); - assertFalse(iter.hasNext()); - - Notifications.notifyRollbackTransaction(morphline); - Notifications.notifyShutdown(morphline); - - } - -} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java deleted file mode 100644 index 49e2a0e..0000000 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkAvroTest.java +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.morphlines.solr; - -import java.io.File; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.Paths; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Objects; - -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; -import org.apache.avro.file.DataFileReader; -import org.apache.avro.file.FileReader; -import org.apache.avro.generic.GenericData; -import org.apache.avro.generic.GenericDatumReader; -import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.request.UpdateRequest; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.util.BadHdfsThreadsFilter; -import org.junit.Test; -import org.kitesdk.morphline.api.Record; -import org.kitesdk.morphline.base.Fields; -import org.kitesdk.morphline.base.Notifications; - -@ThreadLeakFilters(defaultFilters = true, filters = { - BadHdfsThreadsFilter.class // hdfs currently leaks thread(s) -}) -@Slow -public class SolrMorphlineZkAvroTest extends AbstractSolrMorphlineZkTestBase { - - - @Test - public void test() throws Exception { - - Path avro = Paths.get(RESOURCES_DIR).resolve("test-documents").resolve("sample-statuses-20120906-141433-medium.avro"); - - // load avro records via morphline and zk into solr - morphline = parse("test-morphlines" + File.separator + "tutorialReadAvroContainer"); - Record record = new Record(); - byte[] body = Files.readAllBytes(avro); - record.put(Fields.ATTACHMENT_BODY, body); - startSession(); - Notifications.notifyBeginTransaction(morphline); - assertTrue(morphline.process(record)); - assertEquals(1, collector.getNumStartEvents()); - - Notifications.notifyCommitTransaction(morphline); - new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION); - - // fetch sorted result set from solr - QueryResponse rsp = cluster.getSolrClient() - .query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort("id", SolrQuery.ORDER.asc)); - assertEquals(2104, collector.getRecords().size()); - assertEquals(collector.getRecords().size(), rsp.getResults().size()); - - Collections.sort(collector.getRecords(), (r1, r2) -> r1.get("id").toString().compareTo(r2.get("id").toString())); - - // fetch test input data and sort like solr result set - List records = new ArrayList<>(); - FileReader reader = new DataFileReader(avro.toFile(), new GenericDatumReader()); - while (reader.hasNext()) { - GenericData.Record expected = reader.next(); - records.add(expected); - } - assertEquals(collector.getRecords().size(), records.size()); - Collections.sort(records, (r1, r2) -> r1.get("id").toString().compareTo(r2.get("id").toString())); - - Object lastId = null; - for (int i = 0; i < records.size(); i++) { - //System.out.println("myrec" + i + ":" + records.get(i)); - Object id = records.get(i); - if (id != null && id.equals(lastId)) { - throw new IllegalStateException("Detected duplicate id. Test input data must not contain duplicate ids!"); - } - lastId = id; - } - - for (int i = 0; i < records.size(); i++) { - //System.out.println("myrsp" + i + ":" + rsp.getResults().get(i)); - } - - Iterator rspIter = rsp.getResults().iterator(); - for (int i = 0; i < records.size(); i++) { - // verify morphline spat out expected data - Record actual = collector.getRecords().get(i); - GenericData.Record expected = Objects.requireNonNull(records.get(i)); - assertTweetEquals(expected, actual, i); - - // verify Solr result set contains expected data - actual = new Record(); - actual.getFields().putAll(next(rspIter)); - assertTweetEquals(expected, actual, i); - } - - Notifications.notifyRollbackTransaction(morphline); - Notifications.notifyShutdown(morphline); - - } - - private void assertTweetEquals(GenericData.Record expected, Record actual, int i) { - Objects.requireNonNull(expected); - Objects.requireNonNull(actual); -// System.out.println("\n\nexpected: " + toString(expected)); -// System.out.println("actual: " + actual); - String[] fieldNames = new String[] { - "id", - "in_reply_to_status_id", - "in_reply_to_user_id", - "retweet_count", - "text", - }; - for (String fieldName : fieldNames) { - assertEquals( - i + " fieldName: " + fieldName, - expected.get(fieldName).toString(), - actual.getFirstValue(fieldName).toString()); - } - } - -} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java ---------------------------------------------------------------------- diff --git a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java b/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java deleted file mode 100644 index 24d8682..0000000 --- a/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineZkTest.java +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.solr.morphlines.solr; - -import java.io.File; -import java.util.Iterator; - -import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters; -import org.apache.lucene.util.LuceneTestCase.Slow; -import org.apache.solr.client.solrj.SolrQuery; -import org.apache.solr.client.solrj.request.UpdateRequest; -import org.apache.solr.client.solrj.response.QueryResponse; -import org.apache.solr.common.SolrDocument; -import org.apache.solr.util.BadHdfsThreadsFilter; -import org.junit.Test; -import org.kitesdk.morphline.api.Record; -import org.kitesdk.morphline.base.Fields; -import org.kitesdk.morphline.base.Notifications; - -@ThreadLeakFilters(defaultFilters = true, filters = { - BadHdfsThreadsFilter.class // hdfs currently leaks thread(s) -}) -@Slow -public class SolrMorphlineZkTest extends AbstractSolrMorphlineZkTestBase { - - @Test - public void test() throws Exception { - - morphline = parse("test-morphlines" + File.separator + "loadSolrBasic"); - Record record = new Record(); - record.put(Fields.ID, "id0-innsbruck"); - record.put("text", "mytext"); - record.put("user_screen_name", "foo"); - record.put("first_name", "Nadja"); // will be sanitized - startSession(); - assertEquals(1, collector.getNumStartEvents()); - Notifications.notifyBeginTransaction(morphline); - assertTrue(morphline.process(record)); - - record = new Record(); - record.put(Fields.ID, "id1-innsbruck"); - record.put("text", "mytext1"); - record.put("user_screen_name", "foo1"); - record.put("first_name", "Nadja1"); // will be sanitized - assertTrue(morphline.process(record)); - - Record expected = new Record(); - expected.put(Fields.ID, "id0-innsbruck"); - expected.put("text", "mytext"); - expected.put("user_screen_name", "foo"); - Iterator citer = collector.getRecords().iterator(); - assertEquals(expected, citer.next()); - - Record expected2 = new Record(); - expected2.put(Fields.ID, "id1-innsbruck"); - expected2.put("text", "mytext1"); - expected2.put("user_screen_name", "foo1"); - assertEquals(expected2, citer.next()); - - assertFalse(citer.hasNext()); - - Notifications.notifyCommitTransaction(morphline); - new UpdateRequest().commit(cluster.getSolrClient(), COLLECTION); - - QueryResponse rsp = cluster.getSolrClient() - .query(COLLECTION, new SolrQuery("*:*").setRows(100000).addSort(Fields.ID, SolrQuery.ORDER.asc)); - //System.out.println(rsp); - Iterator iter = rsp.getResults().iterator(); - assertEquals(expected.getFields(), next(iter)); - assertEquals(expected2.getFields(), next(iter)); - assertFalse(iter.hasNext()); - - Notifications.notifyRollbackTransaction(morphline); - Notifications.notifyShutdown(morphline); - - } - -} http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/licenses/Saxon-HE-9.6.0-2.jar.sha1 ---------------------------------------------------------------------- diff --git a/solr/licenses/Saxon-HE-9.6.0-2.jar.sha1 b/solr/licenses/Saxon-HE-9.6.0-2.jar.sha1 deleted file mode 100644 index 56981f2..0000000 --- a/solr/licenses/Saxon-HE-9.6.0-2.jar.sha1 +++ /dev/null @@ -1 +0,0 @@ -826a6c32fef050346b37e91b36fde16cf932da12