Return-Path: X-Original-To: apmail-cxf-commits-archive@www.apache.org Delivered-To: apmail-cxf-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id DEB4511C4F for ; Fri, 15 Aug 2014 13:23:28 +0000 (UTC) Received: (qmail 29038 invoked by uid 500); 15 Aug 2014 13:23:28 -0000 Delivered-To: apmail-cxf-commits-archive@cxf.apache.org Received: (qmail 28971 invoked by uid 500); 15 Aug 2014 13:23:28 -0000 Mailing-List: contact commits-help@cxf.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@cxf.apache.org Delivered-To: mailing list commits@cxf.apache.org Received: (qmail 28956 invoked by uid 99); 15 Aug 2014 13:23:28 -0000 Received: from tyr.zones.apache.org (HELO tyr.zones.apache.org) (140.211.11.114) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 15 Aug 2014 13:23:28 +0000 Received: by tyr.zones.apache.org (Postfix, from userid 65534) id 50F669C4FF2; Fri, 15 Aug 2014 13:23:28 +0000 (UTC) Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: reta@apache.org To: commits@cxf.apache.org Date: Fri, 15 Aug 2014 13:23:28 -0000 Message-Id: <51b21817e5fc4ff8b361e52a42c76dc1@git.apache.org> X-Mailer: ASF-Git Admin Mailer Subject: [1/2] git commit: CXF-5923: Create sample to demonstrate usage of search extension with Lucene and Tika. Added duplicated documents handling. Repository: cxf Updated Branches: refs/heads/master 0f54c1947 -> e9d2aa0a6 CXF-5923: Create sample to demonstrate usage of search extension with Lucene and Tika. Added duplicated documents handling. Project: http://git-wip-us.apache.org/repos/asf/cxf/repo Commit: http://git-wip-us.apache.org/repos/asf/cxf/commit/4f70bdf4 Tree: http://git-wip-us.apache.org/repos/asf/cxf/tree/4f70bdf4 Diff: http://git-wip-us.apache.org/repos/asf/cxf/diff/4f70bdf4 Branch: refs/heads/master Commit: 4f70bdf4199a666829cf9c6c62c508d12afbe251 Parents: d3780ee Author: reta Authored: Fri Aug 15 09:22:40 2014 -0400 Committer: reta Committed: Fri Aug 15 09:22:40 2014 -0400 ---------------------------------------------------------------------- .../java/demo/jaxrs/search/client/Client.java | 3 ++ .../java/demo/jaxrs/search/server/Catalog.java | 34 ++++++++++++++++---- .../ext/search/tika/LuceneDocumentMetadata.java | 8 ++++- .../search/tika/TikaLuceneContentExtractor.java | 3 +- 4 files changed, 39 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/cxf/blob/4f70bdf4/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/client/Client.java ---------------------------------------------------------------------- diff --git a/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/client/Client.java b/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/client/Client.java index bd62180..671755b 100644 --- a/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/client/Client.java +++ b/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/client/Client.java @@ -103,7 +103,10 @@ public final class Client { int status = httpClient.executeMethod(post); if (status == 201) { System.out.println(post.getResponseHeader("Location")); + } else if (status == 409) { + System.out.println("Document already exists: " + filename); } + } finally { post.releaseConnection(); } http://git-wip-us.apache.org/repos/asf/cxf/blob/4f70bdf4/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/server/Catalog.java ---------------------------------------------------------------------- diff --git a/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/server/Catalog.java b/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/server/Catalog.java index 3c3d75f..0102d08 100644 --- a/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/server/Catalog.java +++ b/distribution/src/main/release/samples/jax_rs/search/src/main/java/demo/jaxrs/search/server/Catalog.java @@ -63,10 +63,12 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -99,13 +101,18 @@ public class Catalog { final DataHandler handler = attachment.getDataHandler(); if (handler != null) { - final String source = handler.getName(); - + final String source = handler.getName(); + final LuceneDocumentMetadata metadata = new LuceneDocumentMetadata() .withSource(source) .withField("modified", Date.class); try { + if (exists(source)) { + response.resume(Response.status(Status.CONFLICT).build()); + return; + } + final byte[] content = IOUtils.readBytesFromStream(handler.getInputStream()); storeAndIndex(metadata, content); } catch (final IOException ex) { @@ -123,8 +130,6 @@ public class Catalog { response.resume(Response.status(Status.BAD_REQUEST).build()); } } - - }); } @@ -140,10 +145,13 @@ public class Catalog { for (final ScoreDoc scoreDoc: searcher.search(query, 1000).scoreDocs) { final DocumentStoredFieldVisitor visitor = - new DocumentStoredFieldVisitor("source"); + new DocumentStoredFieldVisitor(LuceneDocumentMetadata.SOURCE_FIELD); reader.document(scoreDoc.doc, visitor); - builder.add(visitor.getDocument().getField("source").stringValue()); + builder.add(visitor + .getDocument() + .getField(LuceneDocumentMetadata.SOURCE_FIELD) + .stringValue()); } return builder.build(); @@ -170,7 +178,7 @@ public class Catalog { builder.add( Json.createObjectBuilder() - .add("source", document.getField("source").stringValue()) + .add("source", document.getField(LuceneDocumentMetadata.SOURCE_FIELD).stringValue()) .add("score", scoreDoc.score) ); } @@ -214,6 +222,18 @@ public class Catalog { return visitor; } + private boolean exists(final String source) throws IOException { + final IndexReader reader = DirectoryReader.open(directory); + final IndexSearcher searcher = new IndexSearcher(reader); + + try { + return searcher.search(new TermQuery( + new Term(LuceneDocumentMetadata.SOURCE_FIELD, source)), 1).totalHits > 0; + } finally { + reader.close(); + } + } + private void storeAndIndex(final LuceneDocumentMetadata metadata, final byte[] content) throws IOException { http://git-wip-us.apache.org/repos/asf/cxf/blob/4f70bdf4/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java index f3e0b7e..8cdc2ee 100644 --- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java +++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/LuceneDocumentMetadata.java @@ -26,6 +26,8 @@ import javax.ws.rs.ext.ParamConverterProvider; import org.apache.cxf.jaxrs.ext.search.DefaultParamConverterProvider; public class LuceneDocumentMetadata { + public static final String SOURCE_FIELD = "source"; + private final Map< String, Class< ? > > fieldTypes; private final String contentFieldName; private String source; @@ -63,7 +65,11 @@ public class LuceneDocumentMetadata { public String getContentFieldName() { return contentFieldName; } - + + public String getSourceFieldName() { + return SOURCE_FIELD; + } + public String getSource() { return source; } http://git-wip-us.apache.org/repos/asf/cxf/blob/4f70bdf4/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java ---------------------------------------------------------------------- diff --git a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java index dc086ac..1c25203 100644 --- a/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java +++ b/rt/rs/extensions/search/src/main/java/org/apache/cxf/jaxrs/ext/search/tika/TikaLuceneContentExtractor.java @@ -193,7 +193,8 @@ public class TikaLuceneContentExtractor { } if (!StringUtils.isBlank(documentMetadata.getSource())) { - document.add(new StringField("source", documentMetadata.getSource(), Store.YES)); + document.add(new StringField(documentMetadata.getSourceFieldName(), + documentMetadata.getSource(), Store.YES)); } return document;