Return-Path: X-Original-To: apmail-jena-commits-archive@www.apache.org Delivered-To: apmail-jena-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 00BA7F738 for ; Fri, 5 Apr 2013 10:52:02 +0000 (UTC) Received: (qmail 25392 invoked by uid 500); 5 Apr 2013 10:52:01 -0000 Delivered-To: apmail-jena-commits-archive@jena.apache.org Received: (qmail 25362 invoked by uid 500); 5 Apr 2013 10:52:01 -0000 Mailing-List: contact commits-help@jena.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@jena.apache.org Delivered-To: mailing list commits@jena.apache.org Received: (qmail 25340 invoked by uid 99); 5 Apr 2013 10:52:01 -0000 Received: from athena.apache.org (HELO athena.apache.org) (140.211.11.136) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 05 Apr 2013 10:52:01 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Fri, 05 Apr 2013 10:51:56 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id BF8BD2388847; Fri, 5 Apr 2013 10:51:36 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1464922 [1/3] - in /jena/Experimental/jena-text: ./ src/ src/main/ src/main/java/ src/main/java/examples/ src/main/java/jena/ src/main/java/org/ src/main/java/org/apache/ src/main/java/org/apache/jena/ src/main/java/org/apache/jena/dsg/ sr... Date: Fri, 05 Apr 2013 10:51:34 -0000 To: commits@jena.apache.org From: andy@apache.org X-Mailer: svnmailer-1.0.8-patched Message-Id: <20130405105136.BF8BD2388847@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: andy Date: Fri Apr 5 10:51:33 2013 New Revision: 1464922 URL: http://svn.apache.org/r1464922 Log: Initial import Added: jena/Experimental/jena-text/fuseki-text-config.ttl jena/Experimental/jena-text/log4j.properties jena/Experimental/jena-text/pom.xml jena/Experimental/jena-text/src/ jena/Experimental/jena-text/src/main/ jena/Experimental/jena-text/src/main/java/ jena/Experimental/jena-text/src/main/java/examples/ jena/Experimental/jena-text/src/main/java/examples/JenaTextSearchEx1.java jena/Experimental/jena-text/src/main/java/jena/ jena/Experimental/jena-text/src/main/java/jena/textindexer.java jena/Experimental/jena-text/src/main/java/org/ jena/Experimental/jena-text/src/main/java/org/apache/ jena/Experimental/jena-text/src/main/java/org/apache/jena/ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/BatchedStreamRDF.java jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChanges.java jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChangesBatched.java jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetGraphMonitor.java jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GLib.java jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GraphViewDataset.java jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/QuadAction.java jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamQuadsToTriples.java jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamRDFSplitter.java jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamTriplesToQuads.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Entity.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Indexer.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/QueryPF.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDatasetFactory.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducer.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerEntities.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextDocProducerTriples.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndex.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexException.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexLucene.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextIndexSolr.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/TextQuery.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/EntityMapAssembler.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextAssembler.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextDatasetAssembler.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexLuceneAssembler.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextIndexSolrAssembler.java jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/assembler/TextVocab.java jena/Experimental/jena-text/src/test/ jena/Experimental/jena-text/src/test/java/ jena/Experimental/jena-text/src/test/java/jena/ jena/Experimental/jena-text/src/test/java/jena/TestTextIndexer.java jena/Experimental/jena-text/src/test/java/org/ jena/Experimental/jena-text/src/test/java/org/apache/ jena/Experimental/jena-text/src/test/java/org/apache/jena/ jena/Experimental/jena-text/src/test/java/org/apache/jena/query/ jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/ jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithLuceneTextIndex.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/AbstractTestDatasetWithTextIndex.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/EMBEDDED_SOLR.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/TS_Text.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithEmbeddedSolrTextIndex.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/TestDatasetWithLuceneTextIndex.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/TestSearchBeforeWriteOnDatasetWithLuceneTextIndex.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/TextSearchUtil.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/assembler/ jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/assembler/AbstractTestTextAssembler.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestEntityMapAssembler.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextDatasetAssembler.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexLuceneAssembler.java jena/Experimental/jena-text/src/test/java/org/apache/jena/query/text/assembler/TestTextIndexSolrAssembler.java jena/Experimental/jena-text/testing/ jena/Experimental/jena-text/testing/LARQ/ jena/Experimental/jena-text/text-query.mdtext Added: jena/Experimental/jena-text/fuseki-text-config.ttl URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/fuseki-text-config.ttl?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/fuseki-text-config.ttl (added) +++ jena/Experimental/jena-text/fuseki-text-config.ttl Fri Apr 5 10:51:33 2013 @@ -0,0 +1,77 @@ +## Example of a TDB dataset and text index published using Fuseki + +@prefix : <#> . +@prefix fuseki: . +@prefix rdf: . +@prefix rdfs: . +@prefix tdb: . +@prefix ja: . +@prefix text: . + +[] rdf:type fuseki:Server ; + # Timeout - server-wide default: milliseconds. + # Format 1: "1000" -- 1 second timeout + # Format 2: "10000,60000" -- 10s timeout to first result, then 60s timeout to for rest of query. + # See java doc for ARQ.queryTimeout + # ja:context [ ja:cxtName "arq:queryTimeout" ; ja:cxtValue "10000" ] ; + # ja:loadClass "your.code.Class" ; + + fuseki:services ( + <#service_text_tdb> + ) . + +# TDB +[] ja:loadClass "com.hp.hpl.jena.tdb.TDB" . +tdb:DatasetTDB rdfs:subClassOf ja:RDFDataset . +tdb:GraphTDB rdfs:subClassOf ja:Model . + +# Text +[] ja:loadClass "org.apache.jena.query.text.TextQuery" . +text:TextDataset rdfs:subClassOf ja:RDFDataset . +#text:TextIndexSolr rdfs:subClassOf text:TextIndex . +text:TextIndexLucene rdfs:subClassOf text:TextIndex . + +## --------------------------------------------------------------- + +<#service_text_tdb> rdf:type fuseki:Service ; + rdfs:label "TDB/text service" ; + fuseki:name "ds" ; + fuseki:serviceQuery "query" ; + fuseki:serviceQuery "sparql" ; + fuseki:serviceUpdate "update" ; + fuseki:serviceUpload "upload" ; + fuseki:serviceReadGraphStore "get" ; + fuseki:serviceReadWriteGraphStore "data" ; + fuseki:dataset <#text_dataset> ; + . + +<#text_dataset> rdf:type text:TextDataset ; + text:dataset <#dataset> ; + ##text:index <#indexSolr> ; + text:index <#indexLucene> ; + . + +<#dataset> rdf:type tdb:DatasetTDB ; + tdb:location "DB" ; + tdb:unionDefaultGraph true ; + . + +<#indexSolr> a text:TextIndexSolr ; + #text:server ; + text:server ; + text:entityMap <#entMap> ; + . + +<#indexLucene> a text:TextIndexLucene ; + text:directory ; + ##text:directory "mem" ; + text:entityMap <#entMap> ; + . + +<#entMap> a text:EntityMap ; + text:entityField "uri" ; + text:defaultField "text" ; ## Must be defined in the text:maps + text:map ( + # rdfs:label + [ text:field "text" ; text:predicate rdfs:label ] + ) . Added: jena/Experimental/jena-text/log4j.properties URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/log4j.properties?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/log4j.properties (added) +++ jena/Experimental/jena-text/log4j.properties Fri Apr 5 10:51:33 2013 @@ -0,0 +1,21 @@ +log4j.rootLogger=INFO, stdlog + +log4j.appender.stdlog=org.apache.log4j.ConsoleAppender +## log4j.appender.stdlog.target=System.err +log4j.appender.stdlog.layout=org.apache.log4j.PatternLayout +log4j.appender.stdlog.layout.ConversionPattern=%d{HH:mm:ss} %-5p %-25c{1} :: %m%n + +# Execution logging +log4j.logger.com.hp.hpl.jena.arq.info=INFO +log4j.logger.com.hp.hpl.jena.arq.exec=INFO + +# Everything else in Jena +log4j.logger.com.hp.hpl.jena=INFO +log4j.logger.org.apache.jena=INFO +log4j.logger.org.apache.jena.query.text=INFO +log4j.logger.org.apache.jena.riot=INFO + +## Too noisy. +log4j.logger.org.apache.solr=WARN +log4j.logger.org.apache.solr.core.CoreContainer=FATAL +log4j.logger.org.apache.solr.core.JmxMonitoredMap=FATAL Added: jena/Experimental/jena-text/pom.xml URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/pom.xml?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/pom.xml (added) +++ jena/Experimental/jena-text/pom.xml Fri Apr 5 10:51:33 2013 @@ -0,0 +1,231 @@ + + + 4.0.0 + com.epimorphics.eldp + elps-solr + jar + ELDP-Solr + 0.0.0-SNAPSHOT + + + + apache.snapshots + Apache Snapshot Repository + http://repository.apache.org/snapshots + + false + + + + + + [4.9,) + 1.6.4 + 1.2.16 + 1.6 + ${jdk.version} + UTF-8 + yyyy-MM-dd'T'HH:mm:ssZ + ${maven.build.timestamp} + [1.3,) + + + + + + org.apache.jena + apache-jena-libs + 2.10.1-SNAPSHOT + pom + + + + org.apache.jena + jena-fuseki + 0.2.7-SNAPSHOT + + + + solr-solrj + org.apache.solr + 4.1.0 + jar + compile + + + + + + solr-core + org.apache.solr + 4.1.0 + jar + compile + + + org.slf4j + slf4j-jdk14 + + + + + + javax.servlet + servlet-api + 2.5 + + + + junit + junit + ${ver.junit} + test + + + org.hamcrest + hamcrest-core + + + + + + org.slf4j + slf4j-log4j12 + ${ver.slf4j} + + + + + log4j + log4j + ${ver.log4j} + + + + org.hamcrest + hamcrest-all + ${ver.hamcrest} + test + + + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 2.5.1 + + UTF-8 + true + source,lines,vars + true + ${jdk.version} + ${jdk.version} + true + + + + + org.apache.maven.plugins + maven-surefire-plugin + 2.12.4 + + + **/TS_*.java + + + + + + org.apache.maven.plugins + maven-jar-plugin + 2.4 + + + + org.apache.maven.plugins + maven-source-plugin + 2.2.1 + + + attach-sources + + jar-no-fork + + + + + + + + org.apache.maven.plugins + maven-javadoc-plugin + 2.9 + + + attach-javadocs + + jar + + + + + true + public + true + UTF-8 + ${project.name} ${project.version} + ${project.name} ${project.version} + Licenced under the Apache License, Version 2.0 + + + + + org.apache.maven.plugins + maven-resources-plugin + 2.6 + + UTF-8 + + + + + org.apache.maven.plugins + maven-dependency-plugin + 2.5.1 + + false + true + + + + + org.apache.maven.plugins + maven-eclipse-plugin + 2.9 + + + ${project.build.directory}/classes + true + false + + + + + + + + + Added: jena/Experimental/jena-text/src/main/java/examples/JenaTextSearchEx1.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/examples/JenaTextSearchEx1.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/examples/JenaTextSearchEx1.java (added) +++ jena/Experimental/jena-text/src/main/java/examples/JenaTextSearchEx1.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,161 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package examples; + +import org.apache.jena.atlas.lib.FileOps ; +import org.apache.jena.atlas.logging.Log ; +import org.apache.jena.dsg.DatasetGraphMonitor ; +import org.apache.jena.fuseki.FusekiCmd ; +import org.apache.jena.query.text.* ; +import org.apache.jena.riot.RDFDataMgr ; +import org.apache.lucene.store.Directory ; +import org.apache.lucene.store.RAMDirectory ; +import org.slf4j.Logger ; +import org.slf4j.LoggerFactory ; + +import com.hp.hpl.jena.query.* ; +import com.hp.hpl.jena.rdf.model.Model ; +import com.hp.hpl.jena.sparql.util.QueryExecUtils ; +import com.hp.hpl.jena.tdb.TDBFactory ; +import com.hp.hpl.jena.tdb.transaction.DatasetGraphTransaction ; +import com.hp.hpl.jena.vocabulary.RDFS ; + +// TODO Not yet an example + +public class JenaTextSearchEx1 +{ + + static { Log.setLog4j() ; } + + private static Logger log = LoggerFactory.getLogger(JenaTextSearchEx1.class) ; + + static boolean luceneProgrammatic = true ; + static boolean runClean = true ; + + public static void mainFuseki() + { + if ( true ) + { + FileOps.clearDirectory("solr/SolrARQ/data") ; + FileOps.clearDirectory("Lucene") ; + FileOps.clearDirectory("DB") ; + } + FusekiCmd.main("--config=fuseki-text-config.ttl") ; + System.exit(0) ; + } + + public static void main(String[] args) throws Exception + { +// mainFuseki() ; +// System.exit(0) ; + + if ( ! luceneProgrammatic ) + { + System.setProperty("solr.solr.home", "solr"); + //Log.disable("org.apache.solr") ; + //main2(args) ; System.exit(0) ; + + if ( runClean ) + { + FileOps.clearDirectory("solr/SolrARQ/data") ; + FileOps.clearDirectory("Lucene") ; + FileOps.clearDirectory("DB") ; + } + } + + Dataset ds = null ; + if ( luceneProgrammatic ) + { + TextQuery.init() ; + //Lucene setup - temp. + Dataset ds1 = TDBFactory.createDataset() ; + + EntityDefinition entDef = new EntityDefinition("uri", "text", RDFS.label.asNode()) ; + entDef.set("text", RDFS.label.asNode()) ; + + Directory dir = new RAMDirectory(); + ds = TextDatasetFactory.createLucene(ds1, dir, entDef) ; + } + else + //ds = TextDatasetFactory.create("text-config.ttl") ; + ds = TextDatasetFactory.create("fuseki-text-config.ttl") ; + + loadData(ds) ; + + TextIndex index = ((DatasetGraphText)(ds.asDatasetGraph())).getTextIndex() ; +// System.out.println("DBG") ; +// System.out.println(index.query("*:*")) ; +// System.out.println("DBG") ; + + queryData(ds) ; + System.exit(0) ; + } + + public static void queryData(Dataset dataset) + { + DatasetGraphMonitor dsgm = (DatasetGraphMonitor)dataset.asDatasetGraph() ; + DatasetGraphTransaction dsg = (DatasetGraphTransaction)dsgm.monitored() ; + + dsg.begin(ReadWrite.READ) ; + try { + log.info("START") ; + long startTime = System.nanoTime() ; + String pre = "PREFIX : PREFIX text: PREFIX rdfs: " ; + + //String qs = "SELECT * { ?s text:query 'word' }" ; + //String qs = "SELECT * { ?s text:query (rdfs:label 'X1') ; rdfs:label ?label }" ; + //String qs = "SELECT * { { SELECT DISTINCT ?s { ?s text:query (rdfs:label 'word') } } ?s rdfs:label ?label }" ; + //String qs = "ASK { text:query 'word' }" ; + //String qs = "SELECT * { ?s pf:query (rdfs:label 'word' 4) . ?s rdfs:label ?label }" ; + //String qs = "SELECT * { ?s text:query ('word' 2) . ?s rdfs:label ?label }" ; + //String qs = "SELECT * { ?s ?p ?o }" ; + //Query q = QueryFactory.create(pre+"\n"+qs) ; + + Query q = QueryFactory.read("Q.rq") ; + + QueryExecution qexec = QueryExecutionFactory.create(q , dataset) ; + QueryExecUtils.executeQuery(q, qexec) ; + long finishTime = System.nanoTime() ; + double time = (finishTime-startTime)/1.0e6 ; + log.info(String.format("FINISH - %.2fms", time)) ; + } finally { dsg.end() ; } + } + + public static void loadData(Dataset dataset) + { + if ( ! runClean ) + return ; + + log.info("Start loading") ; + long startTime = System.nanoTime() ; + dataset.begin(ReadWrite.WRITE) ; + try { + Model m = dataset.getDefaultModel() ; + RDFDataMgr.read(m, "D.ttl") ; + //RDFDataMgr.read(dataset, "D.ttl") ; + dataset.commit() ; + } finally { dataset.end() ; } + + long finishTime = System.nanoTime() ; + double time = (finishTime-startTime)/1.0e6 ; + log.info(String.format("Finish loading - %.2fms", time)) ; + } + +} + Added: jena/Experimental/jena-text/src/main/java/jena/textindexer.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/jena/textindexer.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/jena/textindexer.java (added) +++ jena/Experimental/jena-text/src/main/java/jena/textindexer.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package jena; + +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import org.apache.jena.query.text.Entity; +import org.apache.jena.query.text.EntityDefinition; +import org.apache.jena.query.text.TextIndex; +import org.apache.jena.query.text.TextQuery; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import arq.cmd.CmdException; +import arq.cmdline.CmdARQ; +import arq.cmdline.ModDataset; +import arq.cmdline.ModDatasetAssembler; + +import com.hp.hpl.jena.graph.Node; +import com.hp.hpl.jena.query.Dataset; +import com.hp.hpl.jena.sparql.core.DatasetGraph; +import com.hp.hpl.jena.sparql.core.Quad; +import com.hp.hpl.jena.sparql.util.FmtUtils; + + +/** + * Text indexer application that will read a dataset and index its triples in its text index. + */ +public class textindexer extends CmdARQ { + + private static Logger log = LoggerFactory.getLogger(textindexer.class) ; + + protected ModDataset modDataset = new ModDatasetAssembler() ; + protected Dataset dataset = null; + protected TextIndex textIndex = null; + protected EntityDefinition entityDefinition; + protected ProgressMonitor progressMonitor; + + static public void main(String... argv) + { + new textindexer(argv).mainRun() ; + } + + static public void testMain(String... argv) { + new textindexer(argv).mainMethod(); + } + + // @@ TODO + // check integrated properly with command line processing utilities + protected textindexer(String[] argv) + { + super(argv) ; + super.addModule(modDataset); + progressMonitor = new ProgressMonitor("properties indexed"); + } + + @Override + protected void processModulesAndArgs() + { + super.processModulesAndArgs() ; + dataset = modDataset.createDataset(); + if (dataset == null) + throw new CmdException("No dataset specified") ; + textIndex = (TextIndex) dataset.getContext().get(TextQuery.textIndex); + if (textIndex == null) { + throw new CmdException("Dataset has no text index"); + } + entityDefinition = textIndex.getDocDef(); + } + + @Override + protected String getSummary() { + return getCommandName()+" [--desc | --dataset] assemblerPath" ; + } + + @Override + protected void exec() { + Set properties = getIndexedProperties(); + DatasetGraph dsg = dataset.asDatasetGraph(); + textIndex.startIndexing(); + + // there are various strategies possible here + // what is implemented is a first cut simple approach + // currently - for each indexed property + // list and index triples with that property + // that way only process triples that will be indexed + // but each entity may be updated several times + + for (Iterator propIter = properties.iterator(); propIter.hasNext() ; ) { + Iterator quadIter = dsg.find(Node.ANY, Node.ANY, propIter.next(), Node.ANY) ; + for ( ; quadIter.hasNext(); ) { + Quad quad = quadIter.next(); + Entity entity = createEntity(quad) ; + if (entity != null) { + textIndex.addEntity(entity); + progressMonitor.progressByOne(); + } + } + } + textIndex.finishIndexing(); + progressMonitor.close(); + } + + private Set getIndexedProperties() { + Set result = new HashSet(); + for ( Iterator iter = entityDefinition.fields().iterator(); iter.hasNext(); ) { + result.add(entityDefinition.getPredicate(iter.next())); + } + return result; + } + + private Entity createEntity(Quad quad) { + Node s = quad.getSubject(); + String x = (s.isURI() ) ? s.getURI() : s.getBlankNodeLabel() ; + Entity result = new Entity(x); + Node p = quad.getPredicate() ; + String field = entityDefinition.getField(p) ; + if ( field == null ) + return null ; + Node o = quad.getObject() ; + String val = null ; + if ( o.isURI() ) + val = o.getURI() ; + else if ( o.isLiteral() ) + val = o.getLiteralLexicalForm() ; + else + { + log.warn("Not a literal value for mapped field-predicate: "+field+" :: "+FmtUtils.stringForString(field)) ; + return null; + } + result.put(field, val) ; + return result; + } + + // TDBLoader has a similar progress monitor + // Not used here to avoid making ARQ dependent on TDB + // So potential to rationalise and put progress monitor in a common + // utility class @@ TODO + private static class ProgressMonitor { + String progressMessage; + long startTime; + long progressCount; + long intervalStartTime; + long progressAtStartOfInterval; + long reportingInterval = 10000; // milliseconds + + ProgressMonitor(String progressMessage) { + this.progressMessage = progressMessage ; + start(); // in case start not called + } + + void start() { + startTime = System.currentTimeMillis(); + progressCount = 0L; + startInterval(); + } + + private void startInterval() { + intervalStartTime = System.currentTimeMillis(); + progressAtStartOfInterval = progressCount; + } + + void progressByOne() { + progressCount++; + long now = System.currentTimeMillis(); + if (reportDue(now)) { + report(now); + startInterval(); + } + } + + boolean reportDue(long now) { + return now - intervalStartTime >= reportingInterval; + } + + private void report(long now) { + long progressThisInterval = progressCount - progressAtStartOfInterval; + long intervalDuration = now - intervalStartTime; + long overallDuration = now - startTime; + String message = + progressCount + + " (" + progressThisInterval / (intervalDuration/1000) + " per second)" + + progressMessage + + " (" + progressCount / Math.max(overallDuration /1000, 1) + " per second overall)"; + log.info(message); + } + + void close() { + long overallDuration = System.currentTimeMillis() - startTime; + String message = + progressCount + + " (" + progressCount / Math.max(overallDuration / 1000, 1) + " per second)" + + progressMessage; + log.info(message); + } + } +} Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/BatchedStreamRDF.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/BatchedStreamRDF.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/BatchedStreamRDF.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/BatchedStreamRDF.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg; + +import java.util.ArrayList ; +import java.util.List ; + +import org.apache.jena.atlas.lib.Lib ; +import org.apache.jena.riot.system.StreamRDFBase ; + +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.graph.Triple ; +import com.hp.hpl.jena.sparql.core.Quad ; + +/** Batch a stream into truiples and/or quads. + * Triples are batched on subject + * Quads are batched on (graph, subject). + * + */ +public abstract class BatchedStreamRDF extends StreamRDFBase +{ + private Node currentSubject = null ; + private Node currentGraph = null ; + private List batchTriples = null ; + private List batchQuads = null ; + + @Override + public final void start() + { + currentSubject = null ; + currentGraph = null ; + batchTriples = null ; + batchQuads = null ; + startBatching() ; + } + + // ---- Triples + @Override + public void triple(Triple triple) + { + Node s = triple.getSubject() ; +// Node p = triple.getPredicate() ; +// Node o = triple.getObject() ; + + if ( ! Lib.equal(s, currentSubject) ) + { + if ( currentSubject != null ) + finishBatchTriple(currentSubject) ; + startBatchTriple(s) ; + + currentGraph = null ; + currentSubject = s ; + } + + processTriple(triple) ; + } + + private void startBatchTriple(Node subject) + { + batchTriples = new ArrayList() ; + } + + private void finishBatchTriple(Node subject) + { + if ( batchTriples != null && batchTriples.size() > 0 ) + dispatchTriples(currentSubject, batchTriples) ; + } + + private void processTriple(Triple triple) + { + batchTriples.add(triple) ; + } + + // ---- Quads + @Override + public void quad(Quad quad) + { + if ( false ) + { + // Merge to a triple stream. + triple(quad.asTriple()) ; + return ; + } + + Node g = quad.getGraph() ; + Node s = quad.getSubject() ; + +// Node p = triple.getPredicate() ; +// Node o = triple.getObject() ; + + if ( ! Lib.equal(g, currentGraph) || ! Lib.equal(s, currentSubject) ) + { + if ( currentSubject != null ) + finishBatchQuad(currentGraph, currentSubject) ; + startBatchQuad(g, s) ; + currentGraph = g ; + currentSubject = s ; + } + processQuad(quad) ; + } + + private void startBatchQuad(Node graph, Node subject) + { + batchQuads = new ArrayList() ; + } + + private void finishBatchQuad(Node graph, Node subject) + { + if ( batchQuads != null && batchQuads.size() > 0 ) + dispatchQuads(currentGraph, currentSubject, batchQuads) ; + } + + private void processQuad(Quad Quad) + { + batchQuads.add(Quad) ; + } + + private void flush() + { + finishBatchTriple(currentSubject) ; + finishBatchQuad(currentGraph, currentSubject) ; + } + + @Override + public final void finish() + { + flush() ; + finishBatching() ; + } + + public abstract void dispatchTriples(Node s, List batch) ; + + public abstract void dispatchQuads(Node g, Node s, List batch) ; + + public abstract void startBatching() ; + + public abstract void finishBatching() ; + + +// public void dispatchTriples(List batch) +// { +// System.out.println("Batch/T : "+batch.size()) ; +// } +// +// public void dispatchQuads(List batch) +// { +// System.out.println("Batch/Q : "+batch.size()) ; +// } +} + Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChanges.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChanges.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChanges.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChanges.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg; + +import com.hp.hpl.jena.graph.Node ; + +/** Interface for notification of changes + * (adds and deletes of quads) to a DatasetGraph + */ +public interface DatasetChanges //extends Transactional +{ + /** Indicator that a sequence of changes is about to start + * Check each implementation of DatasetChanges as to whether this is used and + * what it indicates. + */ + public void start() ; + + /** A change has occurred + * @see QuadAction + */ + public void change(QuadAction qaction, Node g, Node s, Node p, Node o) ; + + /** Indicator that a sequence of changes has now finished */ + public void finish() ; +} \ No newline at end of file Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChangesBatched.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChangesBatched.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChangesBatched.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetChangesBatched.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg; + +import java.util.ArrayList ; +import java.util.List ; + +import org.apache.jena.atlas.lib.Lib ; + +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.sparql.core.Quad ; + +public abstract class DatasetChangesBatched implements DatasetChanges +{ + private QuadAction currentAction = null ; + private Node currentSubject = null ; + private Node currentGraph = null ; + private List batchQuads = null ; + private boolean mergeBlankNodes = false ; + + protected DatasetChangesBatched() + { + this(false) ; + } + + /* Merge bNodes in a batch - i.e. include them in the current batch, not as new entities */ + protected DatasetChangesBatched(boolean mergeBNodes) + { + this.mergeBlankNodes = mergeBNodes ; + } + + @Override public final void start() + { + startBatch() ; + startBatched() ; + } + + @Override public final void finish() + { + finishBatch() ; + finishBatched() ; + } + + @Override + public void change(QuadAction qaction, Node g, Node s, Node p, Node o) + { + if ( mergeBlankNodes && s.isBlank() ) + { + if ( batchQuads == null ) + // No active batch. + startBatch() ; + } + else if ( ! Lib.equal(currentAction, qaction) || + ! Lib.equal(currentGraph, g) || + ! Lib.equal(currentSubject, s) ) + { + finishBatch() ; + startBatch() ; + currentAction = qaction ; + currentGraph = g ; + currentSubject = s ; + } + + batchQuads.add(new Quad(g,s,p,o)) ; + } + + private void startBatch() + { + if ( batchQuads == null ) + batchQuads = new ArrayList() ; + } + + protected void finishBatch() + { + if ( batchQuads == null || batchQuads.size() == 0 ) + return ; + dispatch(batchQuads) ; + batchQuads = null ; + } + + protected abstract void dispatch(List batch) ; + + protected abstract void startBatched() ; + + protected abstract void finishBatched() ; + +} + Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetGraphMonitor.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetGraphMonitor.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetGraphMonitor.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/DatasetGraphMonitor.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,213 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg ; + +import java.util.ArrayList ; +import java.util.Iterator ; +import java.util.List ; +import java.util.NoSuchElementException ; + +import com.hp.hpl.jena.graph.Graph ; +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.graph.Triple ; +import com.hp.hpl.jena.sparql.core.DatasetGraph ; +import com.hp.hpl.jena.sparql.core.DatasetGraphWrapper ; +import com.hp.hpl.jena.sparql.core.Quad ; +import com.hp.hpl.jena.util.iterator.ExtendedIterator ; + +public class DatasetGraphMonitor extends DatasetGraphWrapper +{ + /** Whether to see if a quad action will change the dataset - test before add for existence, test before elete for absence */ + private boolean CheckFirst = true ; + /** Whether to record a no-op (maybe as a comment) */ + private boolean RecordNoAction = true ; + /** Whgere to send the notifications */ + private final DatasetChanges monitor ; + + public DatasetGraphMonitor(DatasetGraph dsg, DatasetChanges monitor) + { + super(dsg) ; + this.monitor = monitor ; + } + + public DatasetChanges getMonitor() { return monitor ; } + public DatasetGraph monitored() { return getWrapped() ; } + + @Override public void add(Quad quad) + { + if ( CheckFirst && contains(quad) ) + { + if ( RecordNoAction ) + record(QuadAction.NO_ADD, quad.getGraph(), quad.getSubject(), quad.getPredicate(), quad.getObject()) ; + return ; + } + add$(quad) ; + } + + @Override public void add(Node g, Node s, Node p, Node o) + { + if ( CheckFirst && contains(g,s,p,o) ) + { + if ( RecordNoAction ) + record(QuadAction.NO_ADD,g,s,p,o) ; + return ; + } + + add$(g,s,p,o) ; + } + + private void add$(Node g, Node s, Node p, Node o) + { + super.add(g,s,p,o) ; + record(QuadAction.ADD,g,s,p,o) ; + } + + private void add$(Quad quad) + { + super.add(quad) ; + record(QuadAction.ADD, quad.getGraph(), quad.getSubject(), quad.getPredicate(), quad.getObject()) ; + } + + @Override public void delete(Quad quad) + { + if ( CheckFirst && ! contains(quad) ) + { + if ( RecordNoAction ) + record(QuadAction.NO_DELETE, quad.getGraph(), quad.getSubject(), quad.getPredicate(), quad.getObject()) ; + return ; + } + delete$(quad) ; + } + + @Override public void delete(Node g, Node s, Node p, Node o) + { + if ( CheckFirst && ! contains(g,s,p,o) ) + { + if ( RecordNoAction ) + record(QuadAction.NO_DELETE, g,s,p,o) ; + return ; + } + delete$(g,s,p,o) ; + } + + private void delete$(Quad quad) + { + super.delete(quad) ; + record(QuadAction.DELETE, quad.getGraph(), quad.getSubject(), quad.getPredicate(), quad.getObject()) ; + } + + private void delete$(Node g, Node s, Node p, Node o) + { + super.delete(g,s,p,o) ; + record(QuadAction.DELETE,g,s,p,o) ; + } + + + private static int SLICE = 1000 ; + + @Override + public void deleteAny(Node g, Node s, Node p, Node o) + { + // Need to find otu what is actually deleted. + // Could record explicit ..... super.deleteAny(g,s,p,o) ; + // or record actually diff ... + + while (true) + { + Iterator iter = find(g, s, p, o) ; + + // Read some + List some = take(iter, SLICE) ; + for (Quad q : some) + delete$(q) ; + if (some.size() < SLICE) break ; + } + } + + static List take(Iterator iter, int N) + { + iter = new IteratorN(iter, N) ; + List x = new ArrayList(N) ; + for ( ; iter.hasNext() ; ) + x.add(iter.next()) ; + return x ; + } + + static class IteratorN implements Iterator + { + private final Iterator iter ; + private final int N ; + private int count ; + + IteratorN(Iterator iter, int N) { + this.iter = iter ; + this.N = N ; + this.count = 0 ; + } + + @Override + public boolean hasNext() + { + if ( count >= N ) + return false ; + return iter.hasNext() ; + } + + @Override + public T next() + { + if ( count >= N ) + throw new NoSuchElementException() ; + T x = iter.next() ; + count++ ; + return null ; + } + + @Override + public void remove() + { + // But leave the count as-is. + iter.remove() ; + } + } + + @Override public void addGraph(Node gn, Graph g) + { + // Convert to quads. + //super.addGraph(gn, g) ; + ExtendedIterator iter = g.find(Node.ANY, Node.ANY, Node.ANY) ; + for ( ; iter.hasNext(); ) + { + Triple t = iter.next() ; + add(gn, t.getSubject(), t.getPredicate(), t.getObject()) ; + } + } + + @Override public void removeGraph(Node gn) + { + //super.removeGraph(gn) ; + deleteAny(gn, Node.ANY, Node.ANY, Node.ANY) ; + } + + private void record(QuadAction action, Node g, Node s, Node p, Node o) + { + monitor.change(action, g, s, p, o) ; + } +} + Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GLib.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GLib.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GLib.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GLib.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,134 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg; + +import java.util.Iterator ; + +import org.apache.jena.atlas.iterator.Iter ; +import org.apache.jena.atlas.iterator.Transform ; + +import com.hp.hpl.jena.graph.Graph ; +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.graph.Triple ; +import com.hp.hpl.jena.sparql.core.Quad ; +import com.hp.hpl.jena.util.iterator.ExtendedIterator ; + +/** A collection of Graph/Triple/Node related functions */ +public class GLib +{ + // Merge with org.apache.jena.riot.other.Glib somehow. + // Need an "RDF commons" area. + + /** Convert null to Node.ANY */ + public static Node nullAsAny(Node x) { return nullAsDft(x, Node.ANY) ; } + + /** Convert null to some default Node */ + public static Node nullAsDft(Node x, Node dft) { return x==null ? dft : x ; } + + // DISTINCT means these are space using. + /** List the subjects in a graph (no duplicates) */ + public static Iterator listSubjects(Graph graph) + { + ExtendedIterator iter = graph.find(Node.ANY, Node.ANY, Node.ANY) ; + return Iter.iter(iter).map(projectTripeSubject).distinct() ; + } + + /** List the predicates in a graph (no duplicates) */ + public static Iterator listPredicates(Graph graph) + { + ExtendedIterator iter = graph.find(Node.ANY, Node.ANY, Node.ANY) ; + return Iter.iter(iter).map(projectTripePredicate).distinct() ; + } + + /** List the objects in a graph (no duplicates) */ + public static Iterator listObjects(Graph graph) + { + ExtendedIterator iter = graph.find(Node.ANY, Node.ANY, Node.ANY) ; + return Iter.iter(iter).map(projectTripeObject).distinct() ; + } + + private static Transform transformQuad2Triple = new Transform () { + @Override + public Triple convert(Quad quad) { return quad.asTriple() ; } + } ; + + /** Project quads to triples */ + public static Iter quads2triples(Iterator iter) + { + return Iter.iter(iter).map(transformQuad2Triple) ; + } + + /** Project quad to graphname */ + public static Iterator quad2graphName(Iterator iter) + { return Iter.map(iter, projectQuadGraphName) ; } + + /** Project quad to graphname */ + public static Iterator quad2subject(Iterator iter) + { return Iter.map(iter, projectQuadSubject) ; } + + /** Project quad to predicate */ + public static Iterator quad2predicate(Iterator iter) + { return Iter.map(iter, projectQuadPredicate) ; } + + /** Project quad to object */ + public static Iterator quad2object(Iterator iter) + { return Iter.map(iter, projectQuadObject) ; } + + /** Project triple to subject */ + public static Iterator triple2subject(Iterator iter) + { return Iter.map(iter, projectTripeSubject) ; } + + /** Project triple to predicate */ + public static Iterator triple2predicate(Iterator iter) + { return Iter.map(iter, projectTripePredicate) ; } + + /** Project triple to object */ + public static Iterator triple2object(Iterator iter) + { return Iter.map(iter, projectTripeObject) ; } + + /** Transform quad to graphname */ + public static Transform projectQuadGraphName = new Transform() { + @Override public Node convert(Quad quad) { return quad.getGraph() ; } + } ; + /** Transform quad to subject */ + public static Transform projectQuadSubject = new Transform() { + @Override public Node convert(Quad quad) { return quad.getSubject() ; } + } ; + /** Transform quad to predicate */ + public static Transform projectQuadPredicate = new Transform() { + @Override public Node convert(Quad quad) { return quad.getPredicate() ; } + } ; + /** Transform quad to object */ + public static Transform projectQuadObject = new Transform() { + @Override public Node convert(Quad quad) { return quad.getObject() ; } + } ; + /** Transform triple to subject */ + public static Transform projectTripeSubject = new Transform() { + @Override public Node convert(Triple triple) { return triple.getSubject() ; } + } ; + /** Transform triple to predicate */ + public static Transform projectTripePredicate = new Transform() { + @Override public Node convert(Triple triple) { return triple.getPredicate() ; } + } ; + /** Transform triple to object */ + public static Transform projectTripeObject = new Transform() { + @Override public Node convert(Triple triple) { return triple.getObject() ; } + } ; +} + Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GraphViewDataset.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GraphViewDataset.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GraphViewDataset.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/GraphViewDataset.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg; + +import java.util.Iterator ; + +import org.apache.jena.atlas.iterator.Iter ; +import org.apache.jena.query.text.DatasetGraphText ; + +import com.hp.hpl.jena.graph.Graph ; +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.graph.Triple ; +import com.hp.hpl.jena.graph.TripleMatch ; +import com.hp.hpl.jena.graph.impl.GraphBase ; +import com.hp.hpl.jena.shared.JenaException ; +import com.hp.hpl.jena.shared.PrefixMapping ; +import com.hp.hpl.jena.sparql.core.Quad ; +import com.hp.hpl.jena.util.iterator.ExtendedIterator ; +import com.hp.hpl.jena.util.iterator.WrappedIterator ; + +/** Graph over a DatasetGraph - maps graph operations to quad operations. */ + +public class GraphViewDataset extends GraphBase +{ + static class GraphViewException extends JenaException + { + public GraphViewException() { super(); } + public GraphViewException(String message) { super(message); } + public GraphViewException(Throwable cause) { super(cause) ; } + public GraphViewException(String message, Throwable cause) { super(message, cause) ; } + } + + private final DatasetGraphText dsg ; + private final Node gn ; // null for default graph. + + protected GraphViewDataset(DatasetGraphText dsg, Node gn) + { + this.dsg = dsg ; + if ( gn == null ) + gn = Quad.defaultGraphNodeGenerated ; + this.gn = gn ; + } + + public static Graph createDefaultGraph(DatasetGraphText dsg) + { return new GraphViewDataset(dsg, Quad.defaultGraphNodeGenerated) ; } + + public static Graph createNamedGraph(DatasetGraphText dsg, Node graphIRI) + { return new GraphViewDataset(dsg, graphIRI) ; } + + private final boolean isDefaultGraph() { return Quad.isDefaultGraph(gn) ; } + + @Override + protected PrefixMapping createPrefixMapping() + { + + return baseGraph().getPrefixMapping() ; + } + + private Graph baseGraph() + { + if ( isDefaultGraph() ) + return dsg.getBase().getDefaultGraph() ; + else + return dsg.getBase().getGraph(gn) ; + } + + @Override + protected ExtendedIterator graphBaseFind(TripleMatch m) + { + if ( m == null ) m = Triple.ANY ; + Node s = m.getMatchSubject() ; + Node p = m.getMatchPredicate() ; + Node o = m.getMatchObject() ; + return graphBaseFind(s, p, o) ; + } + + @Override + protected ExtendedIterator graphBaseFind(Node s, Node p, Node o) + { + Iterator iter = GLib.quads2triples(dsg.find(gn, s, p, o)) ; + if ( Quad.isUnionGraph(gn) ) + return graphUnionFind(s, p, o) ; + return WrappedIterator.createNoRemove(iter) ; + } + + protected ExtendedIterator graphUnionFind(Node s, Node p, Node o) + { + // Implementation may wish to do better so this is separated out. + Iterator iter = GLib.quads2triples(dsg.find(gn, s, p, o)) ; + // Suppress duplicates after projecting to triples. + iter = Iter.distinct(iter) ; + return WrappedIterator.createNoRemove(iter) ; + } + + @Override + public void performAdd( Triple t ) + { + if ( Quad.isUnionGraph(gn) ) + throw new GraphViewException("Can't update the default union graph of a dataset") ; + Node s = t.getSubject() ; + Node p = t.getPredicate() ; + Node o = t.getObject() ; + dsg.add(gn, s, p, o) ; + } + + @Override + public void performDelete( Triple t ) + { + if ( Quad.isUnionGraph(gn) ) + throw new GraphViewException("Can't update the default union graph of a dataset") ; + Node s = t.getSubject() ; + Node p = t.getPredicate() ; + Node o = t.getObject() ; + dsg.delete(gn, s, p, o) ; + } +} + Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/QuadAction.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/QuadAction.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/QuadAction.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/QuadAction.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,26 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg; + +public enum QuadAction { + ADD("A"), DELETE("D"), NO_ADD("#A"), NO_DELETE("#D") ; + public final String label ; + QuadAction(String label) { this.label = label ; } + +} Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamQuadsToTriples.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamQuadsToTriples.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamQuadsToTriples.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamQuadsToTriples.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg; + +import com.hp.hpl.jena.sparql.core.Quad ; + +import org.apache.jena.riot.system.StreamRDF ; +import org.apache.jena.riot.system.StreamRDFWrapper ; + +/** Convert quads to triples by ignoring the graph node in the quad */ +public class StreamQuadsToTriples extends StreamRDFWrapper +{ + public StreamQuadsToTriples(StreamRDF other) { super(other) ; } + + @Override + public void quad(Quad quad) + { triple(quad.asTriple()) ; } +} Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamRDFSplitter.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamRDFSplitter.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamRDFSplitter.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamRDFSplitter.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg; + +import org.apache.jena.atlas.lib.Tuple ; +import org.apache.jena.riot.system.StreamRDF ; + +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.graph.Triple ; +import com.hp.hpl.jena.sparql.core.Quad ; + +class StreamRDFSplitter implements StreamRDF +{ + protected final StreamRDF stream1 ; + protected final StreamRDF stream2 ; + + public StreamRDFSplitter(StreamRDF stream1, StreamRDF stream2) { + this.stream1 = stream1 ; + this.stream2 = stream2 ; + } + + @Override + public void start() + { stream1.start() ; stream2.start() ; } + + @Override + public void triple(Triple triple) + { stream1.triple(triple) ; stream2.triple(triple) ; } + + @Override + public void quad(Quad quad) + { stream1.quad(quad) ; stream2.quad(quad) ; } + + @Override + public void tuple(Tuple tuple) + { stream1.tuple(tuple) ; stream2.tuple(tuple) ; } + + @Override + public void base(String base) + { stream1.base(base) ; stream2.base(base) ; } + + @Override + public void prefix(String prefix, String iri) + { stream1.prefix(prefix, iri) ; stream2.prefix(prefix, iri) ; } + + @Override + public void finish() + { stream1.finish() ; stream2.finish() ; } + +} Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamTriplesToQuads.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamTriplesToQuads.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamTriplesToQuads.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/dsg/StreamTriplesToQuads.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.dsg; + +import org.apache.jena.riot.system.StreamRDF ; +import org.apache.jena.riot.system.StreamRDFWrapper ; + +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.graph.Triple ; +import com.hp.hpl.jena.sparql.core.Quad ; + +/** Convert to quads by adding in a graph node to the quad */ +public class StreamTriplesToQuads extends StreamRDFWrapper +{ + private Node graphName ; + + public StreamTriplesToQuads(StreamRDF other, Node graphName) + { + super(other) ; + this.graphName = graphName ; + } + + @Override + public void triple(Triple triple) + { + Quad q = new Quad(graphName, triple) ; + quad(q) ; + } +} Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/DatasetGraphText.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,157 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.query.text; + +import java.util.Iterator ; +import java.util.List ; + +import org.apache.jena.dsg.DatasetGraphMonitor ; +import org.apache.jena.dsg.GraphViewDataset ; +import org.apache.lucene.queryparser.classic.QueryParser ; +import org.slf4j.Logger ; +import org.slf4j.LoggerFactory ; + +import com.hp.hpl.jena.graph.Graph ; +import com.hp.hpl.jena.graph.Node ; +import com.hp.hpl.jena.query.ReadWrite ; +import com.hp.hpl.jena.sparql.core.DatasetGraph ; +import com.hp.hpl.jena.sparql.core.DatasetGraphWithLock ; +import com.hp.hpl.jena.sparql.core.Transactional ; + +public class DatasetGraphText extends DatasetGraphMonitor implements Transactional +{ + private static Logger log = LoggerFactory.getLogger(DatasetGraphText.class) ; + private final TextIndex textIndex ; + private final Transactional dsgtxn ; + + public DatasetGraphText(DatasetGraph dsg, TextIndex index, TextDocProducer producer) + { + super(dsg, producer) ; + this.textIndex = index ; + if ( dsg instanceof Transactional ) + dsgtxn = (Transactional)dsg ; + else + dsgtxn = new DatasetGraphWithLock(dsg) ; + } + + + public DatasetGraph getBase() { return getWrapped() ; } + + // ---- Intecept these and force the use of views. + @Override + public Graph getDefaultGraph() + { return GraphViewDataset.createDefaultGraph(this) ; } + + @Override + public Graph getGraph(Node graphNode) + { return GraphViewDataset.createNamedGraph(this, graphNode) ; } + // ---- + + public TextIndex getTextIndex() + { + return textIndex; + } + + public Iterator search(String queryString) + { + return search(queryString, null) ; + } + + public Iterator search(String queryString, Node predicate) + { + return search(queryString, predicate, -1) ; + } + + public Iterator search(String queryString, Node predicate, int limit) + { + queryString = QueryParser.escape(queryString) ; + if ( predicate == null ) + predicate = textIndex.getDocDef().getPrimaryPredicate() ; + if ( predicate != null ) + { + String f = textIndex.getDocDef().getField(predicate) ; + queryString = f+":"+queryString ; + } + List results = textIndex.query(queryString, limit) ; + return results.iterator() ; + } + + // Imperfect. + private boolean needFinish = false ; + + @Override + public void begin(ReadWrite readWrite) + { + dsgtxn.begin(readWrite) ; + //textIndex.begin(readWrite) ; + if ( readWrite == ReadWrite.WRITE ) + { + // WRONG design + super.getMonitor().start() ; + // Right design. + //textIndex.startIndexing() ; + needFinish = true ; + } + } + + @Override + public void commit() + { + try { + if ( needFinish ) + { + super.getMonitor().finish() ; + //textIndex.finishIndexing() ; + } + needFinish = false ; + //textIndex.commit() ; + dsgtxn.commit() ; + } catch (Throwable ex) { + log.warn("Exception in commit: "+ex.getMessage(), ex) ; + dsgtxn.abort() ; + } + } + + @Override + public void abort() + { + try { + if ( needFinish ) + textIndex.abortIndexing() ; + //textIndex.abort() ; + dsgtxn.abort() ; + } catch (Throwable ex) { log.warn("Exception in abort: "+ex.getMessage(), ex) ; } + } + + @Override + public boolean isInTransaction() + { + return dsgtxn.isInTransaction() ; + } + + @Override + public void end() + { + try { + //textIndex.end() ; + dsgtxn.end() ; + } catch (Throwable ex) { log.warn("Exception in end: "+ex.getMessage(), ex) ; } + } +} + Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Entity.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Entity.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Entity.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Entity.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.query.text; + +import java.util.HashMap ; +import java.util.Map ; + +public class Entity +{ + private final String id ; + private final Map map = new HashMap() ; + + public Entity(String entityId) { this.id = entityId ; } + + public String getId() { return id ; } + + public void put(String key, Object value) + { map.put(key, value) ; } + + public Object get(String key) + { return map.get(key) ; } + + public Map getMap() { return map ; } + + @Override + public String toString() { + return id+" : "+map ; + } +} + Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/EntityDefinition.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.query.text; + +import java.util.Collection ; +import java.util.Collections ; +import java.util.HashMap ; +import java.util.Map ; + +import com.hp.hpl.jena.graph.Node ; + +/** Definition of a "document" + */ +public class EntityDefinition +{ + private final Map predicateToField = new HashMap() ; + private final Map fieldToPredicate = new HashMap() ; + private final Collection fields = Collections.unmodifiableCollection(fieldToPredicate.keySet()) ; + private final String entityField ; + private final String primaryField ; + + /** + * @param entityField The entity being indexed (e.g. it's URI). + * @param primaryField The primary/default field to search + * @param primaryProperty The property associated with the primary/default field + */ + public EntityDefinition(String entityField, String primaryField, Node primaryProperty) + { + this.entityField = entityField ; + this.primaryField = primaryField ; + set(primaryField, primaryProperty) ; + } + + public String getEntityField() { return entityField ; } + + public void set(String field, Node predicate) { + predicateToField.put(predicate, field) ; + fieldToPredicate.put(field, predicate) ; + } + + public Node getPredicate(String field) { + return fieldToPredicate.get(field) ; + } + + public String getField(Node predicate) { + return predicateToField.get(predicate) ; + } + + public Node getPrimaryPredicate() { return fieldToPredicate.get(primaryField) ; } + + public String getPrimaryField() { return primaryField ; } + + public Collection fields() { return fields ; } +} + Added: jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Indexer.java URL: http://svn.apache.org/viewvc/jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Indexer.java?rev=1464922&view=auto ============================================================================== --- jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Indexer.java (added) +++ jena/Experimental/jena-text/src/main/java/org/apache/jena/query/text/Indexer.java Fri Apr 5 10:51:33 2013 @@ -0,0 +1,27 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.jena.query.text; + +public interface Indexer +{ + void start() ; + void handle(Entity entity) ; + void finish() ; +} +