incubator-triplesoup-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From leosim...@apache.org
Subject svn commit: r528394 [14/35] - in /incubator/triplesoup/donations/TRIPLES-3-RDFStore: ./ dbms/ dbms/client/ dbms/client/t/ dbms/dbmsproxy/ dbms/deamon/ dbms/doc/ dbms/include/ dbms/libdbms/ dbms/utils/ doc/ include/ lib/ lib/DBD/ lib/RDFStore/ lib/RDFSt...
Date Fri, 13 Apr 2007 08:56:16 GMT
Added: incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Object.pm
URL: http://svn.apache.org/viewvc/incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Object.pm?view=auto&rev=528394
==============================================================================
--- incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Object.pm (added)
+++ incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Object.pm Fri Apr 13 01:56:01 2007
@@ -0,0 +1,495 @@
+# *
+# *     Copyright (c) 2000-2006 Alberto Reggiori <areggiori@webweaving.org>
+# *                        Dirk-Willem van Gulik <dirkx@webweaving.org>
+# *
+# * NOTICE
+# *
+# * This product is distributed under a BSD/ASF like license as described in the 'LICENSE'
+# * file you should have received together with this source code. If you did not get a
+# * a copy of such a license agreement you can pick up one at:
+# *
+# *     http://rdfstore.sourceforge.net/LICENSE
+# *
+# * Changes:
+# *     version 0.1 - 2005/06/09 at 16:21 CEST
+# *
+
+package RDFStore::Object;
+{
+use vars qw ($VERSION $AUTOLOAD);
+use strict;
+use Carp;
+ 
+$VERSION = '0.1';
+
+use RDFStore::Resource;
+use RDFStore::Model;
+use RDFStore::Vocabulary::RDF;
+use RDFStore::Vocabulary::RDFStoreContext;
+use RDFStore::Statement;
+
+@RDFStore::Object::ISA = qw( RDFStore::Resource ); # a bit property centric API now?!?
+
+# map symbolic namespace identifiers to real URLs which can be processed
+# NOTE: hopefully this will map URIs-to-URNs via DDDS I2C (or even better if user writes urn: like xmlns declarations :)
+# mime-type is not used/negotiated and an extra hash key is used instead to specify that
+%RDFStore::Object::default_prefixes = (
+        #'#default' => { 'namespace' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' }, #this is just handy
+        'rdf' => { 'namespace' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' },
+        'rdfs' => { 'namespace' => 'http://www.w3.org/2000/01/rdf-schema#' },
+        'rss' => { 'namespace' => 'http://purl.org/rss/1.0/' },
+        'daml' => { 'namespace' => 'http://www.daml.org/2001/03/daml+oil#' },
+        'dc' => { 'namespace' => 'http://purl.org/dc/elements/1.1/' },
+        'dcq' => { 'namespace' => 'http://purl.org/dc/terms/' },
+        'foaf' => { 'namespace' => 'http://xmlns.com/foaf/0.1/' },
+        'xsd' => { 'namespace' => 'http://www.w3.org/2001/XMLSchema#' },
+        'owl' => { 'namespace' => 'http://www.w3.org/2002/07/owl#' }
+	# E.g. of a real one
+	# 'isc' => { 'namespace' => 'http://earth.esa.int/standards/showcase/',
+	#            'URI' => 'http://demo.asemantics.com/rdformer/isc/config/isc.rdf', 
+	#	     'content_type' => 'RDF/XML' }
+        );
+
+# subclass and adapt to RDFStore::Resource
+sub new {
+        my ($pkg, $namespace, $localname, $bnode) = @_;
+
+	my $self={ 'prefixes' =>  {}, 'schemas' => {}, 'types' => [] };
+
+	# set default prefixes, schemas and content_types
+        map {
+		my $x = $_;
+                $self->{ 'prefixes' }->{ $x } = {};
+
+		map {
+                	$self->{ 'prefixes' }->{ $x }->{ $_ } = $RDFStore::Object::default_prefixes{ $x }->{ $_ };
+		} keys %{ $RDFStore::Object::default_prefixes{ $x } };
+        } keys %RDFStore::Object::default_prefixes;
+
+	# if no name or QName is passed is a bNoded RDF object by office - no error like in RDFStore::Resource superclass
+	if( $namespace ) {
+		$self->{'rdf_object'} = $pkg->SUPER::new( $namespace, $localname, $bnode );
+	} else {
+		$self->{'rdf_object'} = _createbNode( $self );
+		};
+
+	bless $self, $pkg;
+	};
+
+# this is needed due we expect to have some RDFStore::ObjectFactory at some moment....
+sub _createbNode {
+	my ($class) = @_;
+
+        $class->{'bnodesCounter'} = 0
+		unless( exists $class->{'bnodesCounter'});
+	
+        $class->{'timestamp'} = time()
+		unless( exists $class->{'timestamp'});
+
+        $class->{'rand_seed'} = unpack("H*", rand())
+		unless( exists $class->{'rand_seed'});
+
+	# try to generate system/run wide unique ID i.e. 'S' + unpack("H*", rand()) + 'P' + $$ + 'T' + time() + 'N' + GenidNumber
+	return new RDFStore::Resource(
+			'rdf:object:genidrdfstore' .
+                        'S'.$class->{'rand_seed'} .
+                        'P'. $$.
+                        'T'. $class->{'timestamp'} .
+			'N'. $class->{bnodesCounter}++, undef, 1 );
+	};
+
+# export prefixes
+sub export {
+	my ($class, @prefixes) = @_;
+
+	for my $prefix (@prefixes)  {
+		croak "Can't find prefix $prefix - perhaps need to call define() method before?"
+			unless(exists $class->{'prefixes'}->{ $prefix });
+
+		no strict;
+		*{"$prefix\::AUTOLOAD"} = sub {
+			my $object = shift;
+			(my $prop = $AUTOLOAD) =~ s/^.*:://o;
+			if (ref($object)) {
+				$object->set_or_get( "$prefix:$prop", @_ );
+			} else {
+				return "$prefix$prop";
+				};
+			};
+		};
+	};
+
+sub getNamespace {
+	return $_[0]->{'rdf_object'}->getNamespace;
+	};
+
+sub getLocalName {
+	return $_[0]->{'rdf_object'}->getLocalName;
+	};
+
+sub getbNode {
+	return $_[0]->{'rdf_object'}->getbNode;
+	};
+
+sub getLabel {
+	return $_[0]->{'rdf_object'}->getLabel;
+	};
+
+sub getDigest {
+	return $_[0]->{'rdf_object'}->getDigest;
+	};
+
+sub isbNode {
+	return $_[0]->{'rdf_object'}->isAnonymous;
+	};
+
+sub getURI {
+	return
+		if($_[0]->{'rdf_object'}->isAnonymous); #bNodes do not have a URI
+
+	return $_[0]->{'rdf_object'}->getLabel;
+	};
+
+sub getNodeID {
+        return
+                unless($_[0]->{'rdf_object'}->isAnonymous);
+
+        return $_[0]->{'rdf_object'}->getLabel;
+	};
+
+# set the context/provenance for the RDF object
+sub setDomain {
+	my ($class, $context) = @_;
+
+	$class->connect
+		unless($class->isConnected);
+
+	# NOTE: need to think/fix the context/provenance story - also because when connect to another store what happens? another context or the same with
+	#       a different dc:date and dc:source? same ID? boooh....but very important especially when adding URN DDDS registration to this class :o)
+	## create rdfstore:Context for this RDF object
+	#$self->{'context'} = _createbNode( $self );
+	#$self->{'model'}->setContext( $self->{'context'} );
+	## add its triple to model
+	#$self->{'model'}->add( $self->{'context'}, $RDFStore::Vocabulary::RDF::type, $RDFStore::Vocabulary::RDFStoreContext::Context );
+
+	};
+
+# zap any context/provenance info for the RDF object - possible!? needed???
+sub resetDomain {
+	};
+
+# connect/associate an object to an RDFStore::Model
+sub connect {
+	my ($class, $model) = @_;
+
+	if($model and ref($model) and $model->isa("RDFStore::Model")) {
+		if(exists $class->{'model'}) {
+			# copy really stuff across - !!!! these operations can be very expensive !!!!
+			my $stuff = $class->{'model'}->elements;
+			while ( my $st = $stuff->each ) {
+				$model->add($st);
+				};
+			};
+		$class->{'model'} = $model;
+	} elsif($model) {
+		$class->{'model'} = new RDFStore::Model( 'Name' => $model ); # name to physical store
+	} else {
+		$class->{'model'} = new RDFStore::Model
+			unless(exists $class->{'model'}); # create an in-memory model unless is already there
+		};
+
+	return $class->{'model'};
+	};
+
+# dis-connect/de-associate an object from an RDFStore::Model
+sub disconnect {
+	my ($class) = @_;
+
+	my $model = new RDFStore::Model; #empty in-memory
+
+	if(exists $class->{'model'}) {
+		# copy really stuff across - !!!! these operations can be very expensive !!!!
+		my $stuff = $class->{'model'}->elements;
+		while ( my $st = $stuff->each ) {
+			$model->add($st); # note we do not clean up the previously connected store
+			};
+		};
+	$class->{'model'} = $model;
+
+	return $class->{'model'};
+	};
+
+sub connection {
+	return $_[0]->{'model'};
+	};
+
+sub isConnected {
+	return (exists $_[0]->{'model'});
+	};
+
+# tie some bNode identifed RDF object to a well-known URI
+sub deanonymize {
+	my ($class, $uri) = @_;
+
+        return
+                unless( $class->{'rdf_object'}->isAnonymous and (! $uri->isAnonymous ) );
+
+	#fetch object, substitute its identifer with URI and re-ingest it...
+	};
+
+# make the object a bNode i.e. rename it to a randomize/generated rdf:nodeID
+sub anonymize {
+	my ($class) = @_;
+	};
+
+# define prefixes, schemas and content_type for RDF object (shouldnt' these be in RDFStore::Model then like in Jena?)
+sub define {
+	my ($class, %prefixes) = @_;
+
+	# overridden ones
+	map {
+        	$class->{ 'prefixes' }->{ $_ } = $prefixes{ $_ };
+	} %prefixes;
+
+	# merge in passed prefixes stuff to current ones
+	map {
+		my $x = $_;
+                $class->{ 'prefixes' }->{ $x } = {}
+			unless(exists $class->{ 'prefixes' }->{ $x });
+
+		my ($content_type, $URI);
+		map {
+                	$class->{ 'prefixes' }->{ $x }->{ $_ } = $prefixes{ $x }->{ $_ };
+			$content_type = $prefixes{ $x }->{ $_ }
+				if( $_ eq 'content_type' );
+			$URI = $prefixes{ $x }->{ $_ }
+				if( $_ eq 'URI' );
+		} keys %{ $prefixes{ $x } };
+
+		# now if the define is about a specific URI try to content_type parse it and keep it "cached" 
+		# into prefixes hash-table (how many schemas cachable??)
+		if( $URI and $content_type ) {
+			my $schema;
+			eval {
+				$schema = new RDFStore::Model; # we could keep a pool/registry as well with context/provenance for each schema...
+				$schema->setContext( $schema->getNodeFactory->createResource( $URI ) );
+				my $p = $schema->getReader( $content_type );
+				$p->parsefile( $URI );
+				};
+			if($@) {
+				print STDERR $@;
+				return 0;
+				};
+			$class->{ 'prefixes' }->{ $x }->{ 'schema' } = $schema;
+			};
+        	} keys %prefixes;
+
+	return 1;
+	};
+
+# load some RDF into underlying model (not checking yet if the input triples actually relate to the RDF object)
+sub load {
+	my ($class, $input, $syntax) = @_;
+
+	$class->connect
+		unless($class->isConnected);
+
+	my $parser = $class->{'model'}->getReader($syntax);
+
+	return
+		unless($parser);
+
+	if (ref($input) and UNIVERSAL::isa($input, 'RDFStore::Model')) {
+		my $elements = $input->elements;
+		while ( my $st = $elements->each ) {
+			$class->{'model'}->add( $st );
+			};
+	} elsif (ref($input) and UNIVERSAL::isa($input, 'IO::Handle')) {
+		$parser->readstream( $input );
+	} else {
+		my $uri = new URI( $input );
+		if($uri) {
+			$parser->readfile( $uri );
+		} else {
+			$parser->readstring( $input );
+			};
+		};
+	};
+
+sub set_or_get {
+	my ($class, $property, @vals) = @_;
+
+	if (@vals) {
+		$class->set( $property => shift @vals);
+	} else {	
+		return $class->get($property);
+		};
+	};
+
+# initialize the RDF object with a bounce of property-name/property-value pairs as defined into the prefixes and associated schema
+#	$object->set( 'rdf:type' =>  'foaf:Person', 'dc:title' => "my title" )
+#
+# The %values can contain nested RDFStore::RDFNode (and then RDFStore::Object) objects to express the data-structure.
+#
+# If a non-correct RDF-striped syntax is used (resource-property-resource.....-property-value/resource) an error is reported. When a RDF
+# object is 'typed' (one or more rdf:type properties have been associated to it), and the corresponding RDF/S infromation is available
+# basic RDF/S semantics checking is also done on the rdfs:domain, rdfs:range and  and cardinalities (owl:minCardinality and owl:maxCardinality). All
+# mandatory (owl:minCardinality >= 1 if defined) fields are being defaulted to NULL (undef). Recursive bNodes are created as necessary (like
+# following the bNodes / CBD description for the Joseki fetch_object() method to understand).
+#
+sub set {
+	my ($class, %values) = @_;
+
+	$class->connect
+		unless($class->isConnected);
+
+	# pick up properties
+	foreach my $property_name ( keys %values ) {
+		# look up property QName for property and build a resource for it (actually here is all the RDFStore::Vocabualry biz which should use
+		# RDF objects instead of simple resources i.e. resources than know about their type informations (even if more polymorphic in RDF :)
+		$property_name =~ m/^([^:]+):?(.*)/;
+		my $localname = ($2) ? $2 : $1;
+		#my $namespace = ($2) ? $class->{ 'prefixes' }->{ $1 }->{ 'namespace' } : $class->{ 'prefixes' }->{ '#default' }->{ 'namespace' };
+		my $namespace = $class->{ 'prefixes' }->{ $1 }->{ 'namespace' }
+			if($2);
+		unless(defined $namespace) {
+			print STDERR "Can not set unknown property '$property_name'\n";
+			return;
+			};
+		my $factory = $class->{'model'}->getNodeFactory;
+		my $property = $factory->createResource( $namespace, $localname );
+
+		my $property_value = $values{ $property_name };
+
+		if( ref($property_value) and UNIVERSAL::isa($property_value, 'RDFStore::Resource') ) {
+			push @{ $class->{'types'} }, $property_value #save the rdf:type (polymorphism alike)
+				if($property->equals( $RDFStore::Vocabulary::RDF::type ));
+
+			# what happen when this is rdf:resource and two RDFStore::Object DBs are different? real linking?? :-)
+
+			$property_value = $property_value->{'rdf_object'} #keep on wrapping up rdf objects..
+				if( ref($property_value) and UNIVERSAL::isa($property_value, 'RDFStore::Object') );
+		} else {
+			if( ref($property_value) =~ /HASH/ ) {
+				my $sub_values =  $property_value;
+				# recursive on brand new untyped object or proper one if RDF/S which also share same DB
+				$property_value = new RDFStore::Object; #bNode for sure
+				$property_value->connect( $class->connection ); # kinda sharing the model (then provenance too???)
+				
+				$property_value->set( %{$sub_values} ); #recursive call on the bNode created - cool eh? :)
+				$property_value = $property_value->{'rdf_object'};#keep on wrapping up rdf objects..
+			} elsif( ref($property_value) =~ /ARRAY/ ) {
+				# target is new rdf:Seq object sharing same DB too
+				my $array_of_values =  $property_value;
+
+				# recursive on brand new untyped object or proper one if RDF/S which also share same DB
+				$property_value = new RDFStore::Object; #bNode for sure
+				$property_value->connect( $class->connection ); # kinda sharing the model (then provenance too???)
+				
+				# expand @{$sub_values} into rdf:_1, rdf:_2 ....rdf:_n story....
+				my %sub_values=();
+				my $i=1;
+				map {
+					$sub_values{ 'rdf:_' . $i++ } = $_;
+				} @{$array_of_values};
+
+				$property_value->set( %sub_values );
+				$property_value = $property_value->{'rdf_object'};#keep on wrapping up rdf objects..
+			} else {
+				#literal.. should add xml:lang and rdf:datatype ala Turtle syntax too
+				$property_value = $factory->createLiteral( $property_value );
+				};
+			};
+
+		#print "S='".$class->{'rdf_object'}->toString."' P='".$property->toString."' O='".$property_value->toString."'\n";
+		# add bloody statement finally :)
+		$class->{'model'}->add( $class->{'rdf_object'}, $property, $property_value ); #what about provenance here then?
+		};
+	};
+
+sub get {
+	my ($class, $property_name) = @_;
+	 
+	$class->connect
+		unless($class->isConnected);
+
+	$property_name =~ m/^([^:]+):?(.*)/;
+	my $localname = ($2) ? $2 : $1;
+	#my $namespace = ($2) ? $class->{ 'prefixes' }->{ $1 }->{ 'namespace' } : $class->{ 'prefixes' }->{ '#default' }->{ 'namespace' };
+	my $namespace = $class->{ 'prefixes' }->{ $1 }->{ 'namespace' }
+		if($2);
+	unless(defined $namespace) {
+		print STDERR "Can not get unknown property '$property_name'\n";
+		return;
+		};
+
+	my $factory = $class->{'model'}->getNodeFactory;
+
+	return
+		unless($factory);
+
+	my $property = $factory->createResource( $namespace, $localname );
+
+	return
+		unless($property);
+
+	my $values = $class->{'model'}->find( $class->{'rdf_object'}, $property )->elements;
+
+	#print "FOUND '".$values->size."' values for '".$property->toString."'\n";
+
+	my @values;
+	while( my $object = $values->each_object ) {
+		push @values, $object;
+		};
+
+	return wantarray ? @values : $values[0];
+	};
+
+sub dump {
+	my($class) = shift;
+
+	$class->serialize(@_);
+	};
+
+sub serialize {
+	my ($class, $fh, $syntax, $namespaces, $base ) = @_;
+
+	$class->connect
+		unless($class->isConnected);
+
+	my %namespaces = ();
+	map {
+		$namespaces{ $class->{'prefixes'}->{ $_ }->{'namespace'} } = $_;
+	} keys %{ $class->{'prefixes'} };
+
+	return $class->{'model'}->serialize( $fh, $syntax, \%namespaces, $base );
+	};
+
+1;
+};
+
+__END__
+
+=head1 NAME
+
+RDFStore::Object - A very useful abstration over an RDFStore::Model
+
+=head1 SYNOPSIS
+
+	use RDFStore::Object;
+
+=head1 DESCRIPTION
+
+A "RDF object" wrapper around RDFStore::Model
+
+=head1 SEE ALSO
+
+RDFStore::Model(3) Class::RDF(3)
+
+=head1 ABOUT RDF Objects
+
+ http://www.hpl.hp.com/techreports/2002/HPL-2002-315.pdf
+
+=head1 AUTHOR
+
+	Alberto Reggiori <areggiori@webweaving.org>

Added: incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Parser.pm
URL: http://svn.apache.org/viewvc/incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Parser.pm?view=auto&rev=528394
==============================================================================
--- incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Parser.pm (added)
+++ incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Parser.pm Fri Apr 13 01:56:01 2007
@@ -0,0 +1,346 @@
+# *
+# *     Copyright (c) 2000-2006 Alberto Reggiori <areggiori@webweaving.org>
+# *                        Dirk-Willem van Gulik <dirkx@webweaving.org>
+# *
+# * NOTICE
+# *
+# * This product is distributed under a BSD/ASF like license as described in the 'LICENSE'
+# * file you should have received together with this source code. If you did not get a
+# * a copy of such a license agreement you can pick up one at:
+# *
+# *     http://rdfstore.sourceforge.net/LICENSE
+# *
+# * Changes:
+# *     version 0.1 - Tue Dec 16 00:51:44 CET 2003
+# *     version 0.2
+# *		- updated wget() adding Accept: HTTP header and use LWP::UserAgent if available
+# *
+
+package RDFStore::Parser;
+{
+use vars qw ( $VERSION %Built_In_Styles );
+use strict;
+ 
+$VERSION = '0.2';
+
+use Carp;
+
+eval { require LWP::UserAgent; };
+$RDFStore::Parser::hasLWPUserAgent = ($@) ? 0 : 1;
+
+sub new {
+	my ($pkg, %args) = @_;
+
+        my $style = $args{Style};
+
+	my $nonexopt = $args{Non_Expat_Options} ||= {};
+
+        $nonexopt->{Style}             = 1;
+        $nonexopt->{Non_Expat_Options} = 1;
+        $nonexopt->{Handlers}          = 1;
+        $nonexopt->{_HNDL_TYPES}       = 1;
+
+        $args{_HNDL_TYPES} = {};
+        $args{_HNDL_TYPES}->{Init} = 1;
+        $args{_HNDL_TYPES}->{Assert} = 1;
+        $args{_HNDL_TYPES}->{Start_XML_Literal} = 1;
+        $args{_HNDL_TYPES}->{Stop_XML_Literal} = 1;
+        $args{_HNDL_TYPES}->{Char_Literal} = 1;
+	$args{_HNDL_TYPES}->{manage_bNodes} = 1; #used only on RDF/XML SiRPAC parser
+        $args{_HNDL_TYPES}->{Final} = 1;
+
+	$args{'warnings'} = [];
+
+        $args{'Handlers'} ||= {};
+        my $handlers = $args{'Handlers'};
+        if (defined($style)) {
+                my $stylepkg = $style;
+                if ($stylepkg !~ /::/) {
+                        $stylepkg = "\u$style";
+                        croak "Undefined style: $style" 
+                                unless defined($Built_In_Styles{$stylepkg});
+                        $stylepkg = 'RDFStore::Parser::NTriples::' . $stylepkg;
+                	};
+
+                # load the requested style
+                eval "use $stylepkg;";
+                if($@) {
+                        warn "Cannot load parser style '$stylepkg'" if($pkg->{Warnings});
+                        exit(1);
+                        };
+
+                my $htype;
+                foreach $htype (keys %{$args{_HNDL_TYPES}}) {
+                        # Handlers explicity given override
+                        # handlers from the Style package
+                        unless (defined($handlers->{$htype})) {
+                                # A handler in the style package must either have
+                                # exactly the right case as the type name or a
+                                # completely lower case version of it.
+                                my $hname = "${stylepkg}::$htype";
+                                if (defined(&$hname)) {
+                                        $handlers->{$htype} = \&$hname;
+                                        next;
+                                	};
+                                $hname = "${stylepkg}::\L$htype";
+                                if (defined(&$hname)) {
+                                        $handlers->{$htype} = \&$hname;
+                                        next;
+                                	};
+                        	};
+                	};
+        	};
+        $args{Pkg} ||= caller;
+
+	$args{'options'} = {};
+
+	$args{'_Source'} = 'STDIN:';
+
+        bless \%args, $pkg;
+	};
+
+sub setProperty {
+	my ($class, $name, $value) = @_;
+	
+	$class->{'options'}->{ $name } = $value;
+	};
+
+sub getProperty {
+	my ($class, $name) = @_;
+	
+	return $class->{'options'}->{ $name };
+	};
+
+sub setHandlers {
+        my ($class, @handler_pairs) = @_;
+
+        croak("Uneven number of arguments to setHandlers method") 
+                if (int(@handler_pairs) & 1);
+
+        my @ret;
+        while (@handler_pairs) {
+                my $type = shift @handler_pairs;
+                my $handler = shift @handler_pairs;
+                unless (defined($class->{_HNDL_TYPES}->{$type})) {
+                        my @types = sort keys %{$class->{_HNDL_TYPES}};
+                        croak("Unknown Parser handler type: $type\n Valid types are : @types");
+                	};
+                push(@ret, $type, $class->{Handlers}->{$type});
+                $class->{Handlers}->{$type} = $handler;
+        	};
+
+        return @ret;
+	};
+
+sub setSource {
+        my ($class,$file_or_uri)=@_;
+
+	$class->{'_Source'} = $file_or_uri
+		if(defined $file_or_uri);
+
+        return $file_or_uri;
+	};
+
+sub getSource {
+	return $_[0]->{'_Source'};
+	};
+
+sub parse { };
+
+sub parsestring { };
+
+sub parsestream { };
+
+sub parsefile {
+	my ($class) = shift;
+
+	$class->setSource( $_[0] );
+	};
+
+sub read {
+	my ($class) = shift;
+
+	$class->parse( @_ );
+	};
+
+sub readstring {
+	my ($class) = shift;
+
+	$class->parsestring( @_ );
+	};
+
+sub readstream {
+	my ($class) = shift;
+
+	$class->parsestream( @_ );
+	};
+
+sub readfile {
+	my ($class) = shift;
+
+	$class->parsefile( @_ );
+	};
+
+sub wget {
+        my ($class,$uri) = @_;
+
+        croak "RDFStore::Parser::wget: input url is not an instance of URI"
+                unless( (defined $uri) && ($uri->isa("URI")) );
+
+        no strict;
+
+	if($RDFStore::Parser::hasLWPUserAgent) {
+		# HTTP GET it
+		my $ua = LWP::UserAgent->new( timeout => 60 );
+
+		my %headers = ( "User-Agent" => "rdfstore\@asemantics.com/$VERSION" );
+		$headers{'Accept'} = 'application/rdf+xml,application/xml;q=0.9,*/*;q=0.5'
+			if($class->isa("RDFStore::Parser::SiRPAC"));
+
+                my $response = $ua->get( $uri->as_string, %headers );
+
+                unless($response) {
+			my $msg = "RDFStore::Parser::wget: Cannot HTTP GET $uri->as_string\n";
+			push @{ $class->{warnings} },$msg;
+			return;
+			};
+
+                return $response->content;
+	} else {
+        	require IO::Socket;
+
+        	local($^W) = 0;
+        	my $sock = IO::Socket::INET->new(       PeerAddr => $uri->host,
+                                                	PeerPort => $uri->port,
+                                                	Proto    => 'tcp',
+                                                	Timeout  => 60) || return undef;
+        	$sock->autoflush;
+        	my $netloc = $uri->host;
+        	$netloc .= ":".$uri->port if $uri->port != 80;
+
+        	my $path = $uri->as_string;
+
+        	#HTTP/1.0 GET request
+        	print $sock join("\015\012" =>
+                    "GET $path HTTP/1.0",
+                    "Host: $netloc",
+                    "User-Agent: rdfstore\@asemantics.com/$VERSION",
+		    ($class->isa("RDFStore::Parser::SiRPAC")) ? "Accept: application/rdf+xml,application/xml;q=0.9,*/*;q=0.5" : "",
+                    "", "");
+
+        	my $line = <$sock>;
+
+		if ($line !~ m,^HTTP/\d+\.\d+\s+(\d\d\d)\s+(.+)$,m) {
+                	my $msg = "RDFStore::Parser::wget: (10 Did not get HTTP/x.x header back...$line";
+                	push @{ $class->{warnings} },$msg;
+                	warn $msg;
+                	return;
+                	};
+        	my $status = $1;
+        	my $reason = $2;
+        	if ( ($status != 200) && ($status != 302) ) {
+                	my $msg = "Error MSG returned from server: $status $reason\n";
+                	push @{ $class->{warnings} },$msg;
+
+                	#try HTTP/1.1 GET request
+                	print $sock join("\015\012" =>
+                                 "GET $path HTTP/1.1",
+                                 "Host: $netloc",
+                                 "User-Agent: rdfstore\@asemantics.com/$VERSION",
+		    		($class->isa("RDFStore::Parser::SiRPAC")) ? "Accept: application/rdf+xml,application/xml;q=0.9,*/*;q=0.5" : "",
+                                 "Connection: close",
+                                 "", "");
+
+                	$line = <$sock>;
+
+                	if ($line !~ m,^HTTP/\d+\.\d+\s+(\d\d\d)\s+(.+)$,m) {
+                        	my $msg = "RDFStore::Parser::wget: Did not get HTTP/x.x header back...$line";
+                        	push @{ $class->{warnings} },$msg;
+                        	warn $msg;
+                        	return;
+                        	};
+                	$status = $3;
+                	$reason = $4;
+
+			if ( ($status != 200) && ($status != 302) ) {
+                        	my $msg = "RDFStore::Parser::wget: Error MSG returned from server: $status $reason\n";
+                        	push @{ $class->{warnings} },$msg;
+                        	return;
+                        	};
+                	};
+
+        	while(<$sock>) {
+                	chomp;
+                	if( m,^Location:\s(.*)$,) {
+                        	if( (   (exists $class->{HTTP_Location}) &&
+                                	(defined $class->{HTTP_Location}) && ($class->{HTTP_Location} ne $1)    ) || 
+                                        (!(defined $class->{HTTP_Location})) ) {
+                                	$class->{HTTP_Location} = $1;
+                                	my $s = $class->wget(new URI($class->{HTTP_Location}));
+                                	$sock = $s
+                                        	if(defined $s);
+                                	last;
+                                	};
+                        	};
+                	last if m/^\s+$/;
+                	};
+
+		my $content='';
+		while(<$sock>) {
+			$content.=$_;
+			};
+
+        	return $content;
+		};
+        };
+
+1;
+};
+
+__END__
+
+=head1 NAME
+
+RDFStore::Parser - Interface to an RDF parser
+
+=head1 SYNOPSIS
+
+	use RDFStore::Parser;
+
+	my $parser = new RDFStore::Parser(
+			ErrorContext => 3, 
+                        Style => 'RDFStore::Parser::Styles::RDFStore::Model'
+			);
+
+	# or...
+	use RDFStore::Model;
+
+	my $model= new RDFStore::Model();
+	$parser = $model->getReader;
+
+	my $rdfstring = qq|
+
+<rdf:RDF
+        xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'
+        xmlns:a='http://description.org/schema/'>
+<rdf:Description rdf:about='http://www.w3.org'>
+        <a:Date>1998-10-03T02:27</a:Date>
+</rdf:Description>
+
+</rdf:RDF>|;
+
+	$model = $parser->parsestring($rdfstring);
+	$model = $parser->parsefile('http://www.w3.org/1999/02/22-rdf-syntax-ns#');
+	$model = $parser->parsestream(*RDFSTREAM);
+
+=head1 DESCRIPTION
+
+An RDFStore::Model parser.
+
+=head1 SEE ALSO
+
+RDFStore::Model(3) RDFStore::Parser::SiRPAC(3) RDFStore::Parser::NTriples(3)
+
+=head1 AUTHOR
+
+	Alberto Reggiori <areggiori@webweaving.org>

Added: incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Parser/NTriples.pm
URL: http://svn.apache.org/viewvc/incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Parser/NTriples.pm?view=auto&rev=528394
==============================================================================
--- incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Parser/NTriples.pm (added)
+++ incubator/triplesoup/donations/TRIPLES-3-RDFStore/lib/RDFStore/Parser/NTriples.pm Fri Apr 13 01:56:01 2007
@@ -0,0 +1,614 @@
+# *
+# *     Copyright (c) 2000-2006 Alberto Reggiori <areggiori@webweaving.org>
+# *                        Dirk-Willem van Gulik <dirkx@webweaving.org>
+# *
+# * NOTICE
+# *
+# * This product is distributed under a BSD/ASF like license as described in the 'LICENSE'
+# * file you should have received together with this source code. If you did not get a
+# * a copy of such a license agreement you can pick up one at:
+# *
+# *     http://rdfstore.sourceforge.net/LICENSE
+# *
+# * Changes:
+# *	version 0.1 - Tue Apr  8 00:28:24 CEST 2003
+# *	version 0.2
+# *		- updated wget() method invocation
+# *
+
+package RDFStore::Parser::NTriples;
+{
+	use vars qw($VERSION %Built_In_Styles $RDF_SYNTAX_NS $RDFMS_parseType_Literal);
+	use strict;
+	use Carp qw(carp croak cluck confess);
+	use URI;
+	use URI::Escape;
+	
+	use RDFStore::Util::UTF8 qw( cp_to_utf8 );
+
+	use RDFStore::Parser;
+	@RDFStore::Parser::NTriples::ISA = qw( RDFStore::Parser );
+
+BEGIN {
+	require XML::Parser::Expat;
+    	$VERSION = '0.1';
+    	croak "XML::Parser::Expat.pm version 2 or higher is needed to process rdf:parseType='Literal' XML content"
+		unless $XML::Parser::Expat::VERSION =~ /^2\./;
+	};
+
+$RDFStore::Parser::NTriples::RDF_SYNTAX_NS="http://www.w3.org/1999/02/22-rdf-syntax-ns#";
+$RDFStore::Parser::NTriples::RDFMS_parseType_Literal = $RDFStore::Parser::NTriples::RDF_SYNTAX_NS . "XMLLiteral";
+
+sub new {
+	my ($pkg) = shift;
+
+        my $self = $pkg->SUPER::new(@_);
+
+        bless $self,$pkg;
+	};
+
+sub parse {
+	my $class = shift;
+
+	$class->SUPER::parse( @_ );
+
+	my $arg = shift;
+	my $file_or_uri = shift;
+
+  	$class->{iReificationCounter}= ( ($class->{GenidNumber}) && (int($class->{GenidNumber})) ) ? $class->{GenidNumber} : 0;
+
+	if(	(exists $class->{Source}) && 
+			(defined $class->{Source}) &&
+			( (!(ref($class->{Source}))) || (!($class->{Source}->isa("URI"))) )	) {
+		if(-e $class->{Source}) {
+			$class->{Source}=URI->new('file:'.$class->{Source});
+		} else {
+			$class->{Source}=URI->new($class->{Source});
+		};
+	} elsif(defined $file_or_uri) {
+		if( (ref($file_or_uri)) && ($file_or_uri->isa("URI")) ) {
+			$class->{Source}=$file_or_uri;
+		} elsif(-e $file_or_uri) {
+			$class->{Source}=URI->new('file:'.$file_or_uri);
+		} else {
+			$class->{Source}=undef; #unknown
+		};
+	};
+	if(     (exists $class->{Source}) &&
+                (defined $class->{Source}) ) {
+                $class->{sSource}= $class->setSource(
+                        (       (ref($class->{Source})) &&
+                                ($class->{Source}->isa("URI")) ) ? $class->{Source}->as_string :
+                                $class->{Source} );
+        	};
+
+	croak "Missing NodeFactory"
+		unless(	(defined $class->{NodeFactory}) && 
+			($class->{NodeFactory}->isa("RDFStore::NodeFactory")) );
+	$class->{nodeFactory} = $class->{NodeFactory};
+	$class->{Warnings} = ( defined $class->{Warnings} && $class->{Warnings} =~ m/off|0|no|hide/ ) ? 0 : 1; #default is on
+
+    	my %handlers = %{$class->{Handlers}}
+		if( (defined $class->{Handlers}) && (ref($class->{Handlers}) =~ /HASH/) );
+
+    	my $init = delete $handlers{Init};
+	my $final = delete $handlers{Final};
+
+	#Trigger 'Init' event
+    	&$init($class) 
+		if defined($init);
+
+	my $result;
+	my $ioref;
+	if (defined $arg) {
+    		if (ref($arg) and UNIVERSAL::isa($arg, 'IO::Handler')) {
+      			$ioref = $arg;
+    		} else {
+      			eval {
+        			$ioref = *{$arg}{IO};
+      				};    
+      			undef $@;
+    			};
+  		};
+
+	eval {
+		if (defined($ioref)) {
+    			my $delim = $class->{Stream_Delimiter};
+    			my $prev_rs;
+   
+    			$prev_rs = ref($ioref)->input_record_separator("\n$delim\n")
+      				if defined($delim);
+   
+			while ( <$arg> ) {
+        			$result = $class->_pp_NTriple($_); 
+				};
+   
+    			ref($ioref)->input_record_separator($prev_rs)
+      				if defined($delim);
+  		} else {
+			map {
+				$result = $class->_pp_NTriple( $_ . ' . ' );
+			} split(/\.[\n\r]+/, $arg );
+  			};
+		};
+
+        my $err = $@;
+        if($err) {
+                croak $err;
+        	};
+
+        if (defined $final) {
+                #Trigger 'Final' event
+                $result = &$final($class);
+        	};
+
+        return $result;
+	};
+
+# see http://www.w3.org/TR/rdf-testcases/#ntriples and http://robustai.net/sailor/grammar/Quads.html
+# some basic parsing - updated version of http://aspn.activestate.com/ASPN/Mail/Message/787168
+sub _pp_NTriple {
+        my ($class, $ntriple) = @_;
+
+        chomp( $ntriple );
+        $ntriple =~ s/^[\x20\x09]+//; # remove leading white space
+        $ntriple =~ s/[\x20\x09]+$//; # remove trailing white space
+
+	return if($ntriple =~ /^#/); # skip comments
+	return unless ($ntriple =~/\S/); # skip empty lines
+
+        if ($ntriple =~ m/[^\x20-\x7e\x0d\x0a\x09]/) {
+                die 'Invalid character(s) found at "'.$&.'" in "'.$ntriple.'"';
+                };
+
+	unless ($ntriple =~ s/\.\s?$//) {
+		die 'Syntax error: missing trailing full stop in "'.$ntriple.'"';
+		};
+
+	# NOTE: need to Unicode \Uxxxxxxxx \uxxxx un-escape
+	$ntriple =~ s/\\[uU]([0-9a-fA-F]{4,8})/&cp_to_utf8(hex($1))/xeg;
+
+        my ($subject, $predicate, $object, $context );
+
+        # parse subject
+        if ($ntriple =~ s/^<([^>]*)>[\x20\x09]+//) {
+                # uriref
+		$subject = $class->{nodeFactory}->createResource( $1 );
+        } elsif ($ntriple =~  s/^_:([A-Za-z][A-Za-z0-9]*)[\x20\x09]+//) {
+                # bNode
+		$subject = $class->{nodeFactory}->createbNode( $1 );
+        } else {
+                die 'Syntax error in <subject> token in "'.$ntriple.'"';
+                };
+
+        # parse predicate
+        if ($ntriple =~  s/^<([^>]*)>[\x20\x09]+//) {
+                # uriref
+		$predicate = $class->{nodeFactory}->createResource( $1 );
+        } elsif ($ntriple =~  s/^_:([A-Za-z][A-Za-z0-9]*)[\x20\x09]+//) { # we allow bArcs (with warning)
+		warn "found bArcs in ntriple" if($class->{Warnings});
+                # bNode
+		$predicate = $class->{nodeFactory}->createbNode( $1 );
+        } else {
+                die 'Syntax error in <predicate> token in "'.$ntriple.'"';
+                };
+
+        # parse object
+        if ($ntriple =~  s/^<([^>]*)>[\x20\x09]*//) {
+                # uriref
+		$object = $class->{nodeFactory}->createResource( $1 );
+        } elsif ($ntriple =~  s/^_:([A-Za-z][A-Za-z0-9]*)[\x20\x09]*//) {
+                # bNode
+		$object = $class->{nodeFactory}->createbNode( $1 );
+        } elsif ($ntriple =~  s/^"(.*)"\@([a-z0-9]+(-[a-z0-9]+)?)\^\^<([^>]*)>[\x20\x09]*//s) { #we need to treat the string as single-line for XML
+                # literal
+		if ( $4 eq $RDFStore::Parser::NTriples::RDFMS_parseType_Literal ) {
+			#parseType='Literal'
+			$object = $class->{nodeFactory}->createLiteral( $1, 1, $2, $4 );
+		} else {
+			$object = $class->{nodeFactory}->createLiteral( $1, undef, $2, $4 );
+			};
+        } elsif ($ntriple =~  s/^"(.*)"\^\^<([^>]*)>[\x20\x09]*//s) {
+                # literal
+		if ( $2 eq $RDFStore::Parser::NTriples::RDFMS_parseType_Literal ) {
+			#parseType='Literal'
+			$object = $class->{nodeFactory}->createLiteral( $1, 1, undef, $2 );
+		} else {
+			$object = $class->{nodeFactory}->createLiteral( $1, undef, undef, $2 );
+			};
+        } elsif ($ntriple =~  s/^"(.*)"\@([a-z0-9]+(-[a-z0-9]+)?)[\x20\x09]*//s) {
+                # literal
+		$object = $class->{nodeFactory}->createLiteral( $1, undef, $2 );
+        } elsif ($ntriple =~  s/^"(.*)"[\x20\x09]*//s) {
+                # literal
+		$object = $class->{nodeFactory}->createLiteral( $1 );
+        } else {
+                die 'Syntax error in <object> token in "'.$ntriple.'"';
+                };
+
+	if ( length($ntriple) ) {
+        	# parse context (Quads actually see http://robustai.net/sailor/grammar/Quads.html)
+        	if ($ntriple =~ s/^<([^>]*)>[\x20\x09]*//) {
+                	# uriref
+			$context = $class->{nodeFactory}->createResource( $1 );
+        	} elsif ($ntriple =~  s/^_:([A-Za-z][A-Za-z0-9]*)[\x20\x09]*//) {
+                	# bNode
+			$context = $class->{nodeFactory}->createbNode( $1 );
+        	} elsif ($ntriple !~  s/^\s*\.//) { # we could have more N-Triples in the same string
+                	die 'Trash found after <object> token in "'.$ntriple.'"'; # should really say 'Syntax error in <context> token' :-)
+                	};
+		};
+
+        return $class->addTriple( $subject, $predicate, $object, $context );
+	};
+
+sub getReificationCounter {
+	return $_[0]->{iReificationCounter};
+	};
+
+sub parsestring {
+	my $class = shift;
+
+	$class->SUPER::parsestring( @_ );
+
+	my $string = shift;
+
+	return $class->parse($string,undef,@_);
+	};
+
+sub parsestream {
+        my $class = shift;
+
+	$class->SUPER::parsestream( @_ );
+
+        my $arg = shift;
+        my $namespace = shift;
+
+	my $ret;
+	eval {
+		$ret = $class->parse($arg, $namespace,@_);
+		};
+	my $err = $@;
+
+	croak $err
+		if $err;
+
+	return $ret;
+        };
+
+sub parsefile {
+	my $class = shift;
+
+	$class->SUPER::parsefile( @_ );
+
+	my $file = shift;
+
+	if( (defined $file) && ($file ne '') ) {
+		my $ret;
+		my $file_uri;
+		my $scheme;
+		$scheme='file:'
+			if( (-e $file) || (!($file =~ /^\w+:/)) );
+                $file_uri= URI->new(((defined $scheme) ? $scheme : '' ).$file);
+		if (	(defined $file_uri) && (defined $file_uri->scheme)	&&
+			($file_uri->scheme ne 'file') ) {
+  			my $content = $class->wget($file_uri);
+			if(defined $content) {
+				eval {
+					$ret = $class->parsestring($content, $file_uri,@_);
+    				};
+    				my $err = $@;
+    				croak $err 	
+					if $err;
+                        } else {
+				croak "Cannot fetch '$file_uri'";
+				};
+    		} else {
+			my $filename= $file_uri->file;
+
+			# FIXME: it might be wrong in some cases
+			local(*FILE);
+			open(FILE, $filename) 
+				or  croak "Couldn't open $filename:\n$!";
+			binmode(FILE);
+			eval {
+				$ret = $class->parse(*FILE,$file_uri,@_);
+    				};
+    			my $err = $@;
+    			close(FILE);
+    			croak $err 	
+				if $err;
+			};
+		return $ret;
+  		};
+	};
+
+sub addTriple {
+        my ($class,$subject,$predicate,$object,$context) = @_;
+
+#print STDERR "addTriple('".$subject->toString."','".$predicate->toString."','".$object->toString."'".( ($context) ? ",'".$context->toString."'" : '' ).")",((caller)[2]),"\n";
+
+        # If there is no subject (about=""), then use the URI/filename where the RDF description came from
+	$subject = $class->{nodeFactory}->createResource($class->{sSource})
+		unless( (defined $subject) && ($subject->toString()) && (length($subject->toString())>0) );
+
+	#Trigger 'Assert' event
+        my $assert = $class->{Handlers}->{Assert}
+		if(ref($class->{Handlers}) =~ /HASH/);
+        if (defined($assert)) {
+        	return &$assert($class, $class->{nodeFactory}->createStatement($subject,$predicate,$object,$context) );
+	} else {
+		return;
+		};
+	};
+
+sub newReificationID {
+	my ($class) = @_;
+
+#print STDERR "newReificationID($class): ",((caller)[2]),"\n";
+
+	return 'genid' . $class->{iReificationCounter}++;
+	};
+
+1;
+};
+
+__END__
+
+=head1 NAME
+
+RDFStore::Parser::NTriples - This module implements a streaming N-Triples parser 
+
+=head1 SYNOPSIS
+
+	use RDFStore::Parser::NTriples;
+        use RDFStore::NodeFactory;
+        my $p=new RDFStore::Parser::NTriples(
+		ErrorContext => 2,
+                Handlers        => {
+                        Init    => sub { print "INIT\n"; },
+                        Final   => sub { print "FINAL\n"; },
+                        Assert  => sub { print "STATEMENT - @_\n"; }
+                },
+                NodeFactory     => new RDFStore::NodeFactory() );
+
+	$p->parsefile('http://www.gils.net/bsr-gils.nt');
+        $p->parsefile('http://www.gils.net/rdf/bsr-gils.nt');
+        $p->parsefile('/some/where/my.nt');
+        $p->parsefile('file:/some/where/my.nt');
+	$p->parse(*STDIN);
+
+	use RDFStore::Parser::NTriples;
+        use RDFStore::NodeFactory;
+	my $pstore=new RDFStore::Parser::NTriples(
+                ErrorContext 	=> 2,
+		Style           => 'RDFStore::Parser::Styles::RDFStore::Model',
+                NodeFactory     => new RDFStore::NodeFactory(),
+                style_options   =>      {
+                                        persistent      =>      1,
+                                        seevalues       =>      1,
+                                        store_options         =>      { Name => '/tmp/test' }
+                                }
+        );
+	$pstore->parsefile('http://www.gils.net/bsr-gils.nt');
+
+
+=head1 DESCRIPTION
+
+This module implements a N-Triples I<streaming> parser.
+
+=head1 METHODS
+
+=over 4
+
+=item new
+
+This is a class method, the constructor for RDFStore::Parser::NTriples. B<Options> are passed as keyword value
+pairs. Recognized options are:
+
+=over 4
+
+=item * NodeFactory
+
+This option is B<mandatory> to run the RDFStore::Parser::NTriples parser correctly and must contain a reference to an object of type RDFStore::NodeFactory(3). Such a reference is used during the RDF parsing to create resources, literal and statements to be passed to the registered handlers. A sample implementation is RDFStore::NodeFactory that is provided
+with the RDFStore package.
+
+=item * Source
+
+This option can be specified by the user to set a base URI to use for the generation of resource URIs during parsing. If this option is omitted the parser will try to generate a prefix for generated resources using the input filename or URL actually containing the input RDF. In a near future such an option could be obsoleted by use of XMLBase W3C raccomandation.
+
+=item * GenidNumber
+
+Seed the genid numbers with the given value
+
+=item * Style
+
+This option provides an easy way to set a given style of parser. There is one sample Sylte module provided with the RDFStore::Parser::NTriples distribution called RDFStore::Parser::Styles::RDFStore::Model. Such a module uses the RDFStore::Model(3) to implement a simple RDF storage.
+Custom styles can be provided by giving a full package name containing
+at least one '::'. This package should then have subs defined for each
+handler it wishes to have installed. See L<"WRITE YOUR OWN PARSER"> below
+for a discussion on how to build one.
+
+=item * Handlers
+
+When provided, this option should be an anonymous hash containing as
+keys the type of handler and as values a sub reference to handle that
+type of event. All the handlers get passed as their 1st parameter the
+instance of Expat that is parsing the document. Further details on
+handlers can be found in L<"HANDLERS">. Any handler set here
+overrides the corresponding handler set with the Style option.
+
+=item * ErrorContext
+
+This is an XML::Parser option. When this option is defined, errors are reported
+in context. The value should be the number of lines to show on either side
+of the line in which the error occurred.
+
+=back
+
+All the other XML::Parser and XML::Parser::Expat options should work freely with RDFStore::Parser::NTriples see XML::Parser(3) and XML::Parser::Expat(3).
+
+=item  setHandlers(TYPE, HANDLER [, TYPE, HANDLER [...]])
+
+This method registers handlers for various parser events. It overrides any
+previous handlers registered through the Style or Handler options or through
+earlier calls to setHandlers. By providing a false or undefined value as
+the handler, the existing handler can be unset.
+
+This method returns a list of type, handler pairs corresponding to the
+input. The handlers returned are the ones that were in effect prior to
+the call.
+
+See a description of the handler types in L<"HANDLERS">.
+
+=item parse(SOURCE, URIBASE [, OPT => OPT_VALUE [...]])
+
+The SOURCE parameter should either be a string containing the whole RDF
+document, or it should be an open IO::Handle.
+The URIBASE can be specified by the user to set a base URI to use for the generation of resource URIs during parsing. If this option is omitted the parser will try to generate a prefix for generated resources using either the L<Source> option of the constructor, the input filename or URL actually containing the input RDF. In a near future such an option could be obsoleted by use of XMLBase W3C raccomandation.
+Constructor options to XML::Parser::Expat given as keyword-value pairs may follow the URIBASE
+parameter. These override, for this call, any options or attributes passed
+through from the RDFStore::Parser::NTriples instance.
+
+A die call is thrown if a parse error occurs. Otherwise it will return 1
+or whatever is returned from the B<Final> handler, if one is installed.
+In other words, what parse may return depends on the style.
+
+e.g. the RDFStore::Parser::NTriples::Style::RDFStore::Model Style module returns an instance of RDFStore::Model
+
+=item parsestring(STRING, URIBASE [, OPT => OPT_VALUE [...]])
+
+This is just an alias for parse for backwards compatibility.
+
+=item parsefile(URL_OR_FILE [, OPT => OPT_VALUE [...]])
+
+Open URL_OR_FILE for reading, then call parse with the open handle. If URL_OR_FILE
+is a full qualified URL this module uses IO::Socket(3) to actually fetch the content.
+The URIBASE L<parse()> parameter is set to URL_OR_FILE.
+
+=item getReificationCounter()
+
+Return the latest genid number generated by the parser
+
+=back
+
+=head1 HANDLERS
+
+The parser is an event based parser. As the parser recognizes N-Triples
+then any handlers registered for that type of an event are called 
+with suitable parameters.
+
+All handlers receive an instance of XML::Parser::Expat as their first
+argument. See L<XML::Parser::Expat/"METHODS"> for a discussion of the
+methods that can be called on this object. Expat is needed to further
+process thing like rdf:parseType="Literal" as XML.
+
+=head2 Init             (Expat)
+
+This is called just before the parsing of the document starts.
+
+=head2 Final            (Expat)
+
+This is called just after parsing has finished, but only if no errors
+occurred during the parse. Parse returns what this returns.
+
+=head2 Assert            (Expat, Statement)
+
+This event is generated when a new RDF statement has been generated by the parseer.start tag is recognized. Statement is of type RDFStore::Statement(3) as generated by the RDFStore::NodeFactory(3) passed as argument to the RDFStore::Parser::NTriples constructor.
+
+=head2 Start_XML_Literal            (Expat, Element [, Attr, Val [,...]])
+
+This event is generated when an XML start tag is recognized within an RDF
+property with parseType="Literal". Element is the
+name of the XML element type that is opened with the start tag. The Attr &
+Val pairs are generated for each attribute in the start tag.
+
+This handler should return a string containing either the original XML chunck or one f its transformations, perhaps using XSLT.
+
+=head2 Stop_XML_Literal              (Expat, Element)
+
+This event is generated when an XML end tag is recognized within an RDF
+property with parseType="Literal". Note that an XML empty tag (<foo/>) generates both a Start_XML_Literal and an Stop_XML_Literal event.
+
+=head2 Char_XML_Literal             (Expat, String)
+
+This event is generated when non-markup is recognized within an RDF
+property with parseType="Literal". The non-markup sequence of characters is in 
+String. A single non-markup sequence of encoding of the string in the original 
+document, this is given to the handler in UTF-8.
+
+This handler should return the processed text as a string.
+
+=head1 WRITE YOUR OWN PARSER
+
+You can either make you Perl script a parser self by embedding the needed function hooks or write a
+custom Style module for RDFStore::Parser::NTriples.
+
+=head2 *.pl scripts
+
+	use RDFStore::Parser::NTriples;
+	use RDFStore::NodeFactory;
+	my $p=new RDFStore::Parser::NTriples(
+		Handlers        => {
+			Init    => sub { print "INIT\n"; },
+			Final   => sub { print "FINAL\n"; },
+			Assert  => sub { print "STATEMENT - @_\n"; }
+		},
+		NodeFactory     => new RDFStore::NodeFactory() );
+
+
+or something like:
+
+	use RDFStore::Parser::NTriples;
+        use RDFStore::NodeFactory;
+	my $p=new RDFStore::Parser::NTriples( NodeFactory     => new RDFStore::NodeFactory() );
+	$p->setHandlers(        Init    => sub { print "INIT\n"; },
+                        	Final   => sub { print "FINAL\n"; },
+                        	Assert  => sub { print join(",",@_),"\n"; }     );
+
+=head2 Style modules
+
+A more sophisticated solution is to write a complete Perl5 Sytle module for RDFStore::Parser::NTriples that
+can be easily reused in your code. E.g. a perl script could use this piece of code:
+
+	use RDFStore::Parser::NTriples;
+	use RDFStore::Parser::NTriples::MyStyle;
+	use RDFStore::NodeFactory;
+
+	my $p=new RDFStore::Parser::NTriples(	Style => 'RDFStore::Parser::NTriples::MyStyle',
+                			NodeFactory     => new RDFStore::NodeFactory() );
+	$p->parsefile('http://www.gils.net/bsr-gils.rdfs');
+
+The Style module self could stored into a file like MyStyle.pm like this:
+
+	package RDFStore::Parser::NTriples::MyStyle;
+
+	sub Init { print "INIT\n"; };
+	sub Final { print "FINAL\n"; };
+	sub Assert {
+                print "ASSERT: ",
+                                $_[1]->subject()->toString(),
+                                $_[1]->predicate()->toString(),
+                                $_[1]->object()->toString(), "\n";
+	};
+	sub Start_XML_Literal { print "STARTAG: ",$_[1],"\n"; };
+	sub Stop_XML_Literal { print "ENDTAG: ",$_[1],"\n"; };
+	sub Char_XML_Literal { print "UTF8 chrs: ",$_[1],"\n"; };
+
+	1;
+
+=head1 SEE ALSO
+
+ RDFStore::Parser::SiRPAC(3), DBMS(3) and XML::Parser(3) XML::Parser::Expat(3)
+
+ RDFStore::Model(3) RDFStore::NodeFactory(3)
+
+ N-Triples - http://www.w3.org/TR/rdf-testcases/#ntriples
+
+ RDF Model and Syntax Specification - http://www.w3.org/TR/rdf-syntax-grammar/
+
+ RDF Schema Specification 1.0 - http://www.w3.org/TR/rdf-schema/
+
+=head1 AUTHOR
+
+	Alberto Reggiori <areggiori@webweaving.org>



Mime
View raw message