couchdb-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From j..@apache.org
Subject [4/52] [partial] Add contributed documentation from janl's couchdb-docs repository.
Date Mon, 30 Apr 2012 17:33:22 GMT
http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/dbcheck.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/dbcheck.pl b/share/docs/DocKit/bin/dbcheck.pl
new file mode 100755
index 0000000..2cdda0c
--- /dev/null
+++ b/share/docs/DocKit/bin/dbcheck.pl
@@ -0,0 +1,40 @@
+#! /usr/bin/perl -w
+
+use strict;
+use warnings;
+use File::Basename;
+BEGIN {
+    use lib dirname($0);
+}
+use CouchDocs;
+use CouchDocs::DBCheck;
+use Getopt::Long;
+use XML::Parser::PerlSAX;
+
+my $global_opts = {};
+GetOptions("opt=s%" => \$global_opts);
+
+my ($srcfile) = (@ARGV);
+
+my $couchdocs = new CouchDocs($global_opts);
+
+if (exists($global_opts->{checkulink}) && $global_opts->{checkulink} == 1)
+{
+    print STDERR "\nWarning: Checking for external links may take some time. Please be patient\n";
+
+    eval "require LWP::UserAgent;";
+
+    die "ulink checking requires LWP::UserAgent\n"
+        if ($@);
+}
+
+my $db_handler = CouchDocs::DBCheck->new($global_opts);
+my $src = $couchdocs->slurp_file($srcfile);
+
+XML::Parser::PerlSAX->new->parse(Source => { String => $src}, 
+                                 Handler => $db_handler);
+
+foreach my $item (@{$db_handler->{issuelist}})
+{
+    print "Error in $item->{parentid}:\n\t$item->{text}";
+}

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/dependparse.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/dependparse.pl b/share/docs/DocKit/bin/dependparse.pl
new file mode 100755
index 0000000..2401996
--- /dev/null
+++ b/share/docs/DocKit/bin/dependparse.pl
@@ -0,0 +1,144 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use File::Basename;
+use Carp;
+BEGIN {
+    use lib dirname($0);
+}
+
+use CouchDocs;
+
+use Getopt::Long;
+
+my $global_opts = {};
+GetOptions("opt=s%" => \$global_opts);
+
+my ($srcfile,$dependfile) = (@ARGV);
+
+croak("Must specify source/destination files") unless(defined($srcfile));
+
+my $couchdocs = new CouchDocs($global_opts);
+
+my $realfile = $srcfile;
+
+if ($global_opts->{buildtype} eq 'version')
+{
+    $realfile = sprintf('%s/%s',$global_opts->{buildsrc},$srcfile);
+}
+
+$couchdocs->dependparse($realfile,$srcfile);
+
+my @dependfile = ();
+
+foreach my $basefile (sort keys %{$couchdocs->{dependencies}})
+{
+    next if ($basefile =~ m/^_/);
+
+    $couchdocs->{dependencies}->{$basefile}->{Makefile} = 1;
+
+    my $srccopy = '';
+    if ($global_opts->{buildtype} eq 'version')
+    {
+        my $origsrc = sprintf('%s/%s',$global_opts->{buildsrc},$basefile);
+
+        if ((stat($origsrc))[1] !=
+            (stat($basefile))[1])
+        {
+            $srccopy = sprintf("\t\$(VERSION_COPY) %s %s\n",
+                               $origsrc,
+                               $basefile);
+            $couchdocs->{dependencies}->{$basefile}->{$origsrc} = 1;
+        }
+    }
+
+    my @dependgraph = sort(dependloop($couchdocs,$basefile));
+
+    push(@dependfile,
+         sprintf("%s: %s\n%s\n",
+                 $basefile,
+                 join(" \\\n\t\t",@dependgraph),
+                 $srccopy,
+                 ),
+        );
+
+    my ($basename) = ($basefile);
+    $basename =~ s{\.xml$}{};
+
+    push(@dependfile,
+         sprintf("%s-ready.xml: %s\n\n",
+                 $basename,
+                 join(" \\\n\t\t",@dependgraph),
+                 ),
+        );
+
+    push(@dependfile,
+         sprintf("%s-ready-online.xml: %s\n\n",
+                 $basename,
+                 join(" \\\n\t\t",@dependgraph),
+                 ),
+        );
+}
+
+my $basename = $srcfile;
+$basename =~ s/\./_/g;
+
+if ($global_opts->{buildtype} eq 'version')
+{
+    foreach my $imgfile (sort keys %{$couchdocs->{dependencies}->{_globalimages}})
+    {
+        push(@dependfile,
+             sprintf("%s: \n\tmkdir -p images\n\t\$(VERSION_COPY) %s/%s %s\n\n",
+                     $imgfile,
+                     $global_opts->{buildsrc},
+                     $imgfile,
+                     $imgfile,
+             )
+            );
+    }
+}
+
+push(@dependfile,
+     sprintf("BASE_TARGET_IMAGES = %s\n\n",
+             join(" \\\n\t",sort keys %{$couchdocs->{dependencies}->{_globalimages}}),
+     )
+    );
+
+push(@dependfile,
+     sprintf("XML_SOURCES = %s\n\n",
+             join(" \\\n\t",sort keys %{$couchdocs->{dependencies}->{_xml}}),
+     )
+    );
+
+push(@dependfile,
+     sprintf("XML_BASE_SOURCES = %s\n\n",
+             join(" \\\n\t",sort map { $_ =~ s/\.xml$//; $_ } keys %{$couchdocs->{dependencies}->{_xmlbase}}),
+     )
+    );
+
+
+$couchdocs->write_file($dependfile,join('',@dependfile));
+
+sub dependloop
+{
+    my ($couchdocs,$basefile) = @_;
+
+    my @filelist = ();
+
+    return(@filelist) unless (defined($couchdocs) && 
+                   defined($basefile));
+    return(@filelist) unless (exists($couchdocs->{dependencies}->{$basefile}));
+
+    foreach my $file (keys %{$couchdocs->{dependencies}->{$basefile}})
+    {
+        next if ($file eq $basefile);
+        push(@filelist,$file);
+        push(@filelist,dependloop($couchdocs,$file));
+    }
+
+    my $unique = {};
+    map { $unique->{$_} = 1 } @filelist;
+
+    return(sort keys %{$unique});
+}

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/docmapparse.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/docmapparse.pl b/share/docs/DocKit/bin/docmapparse.pl
new file mode 100755
index 0000000..bcf605e
--- /dev/null
+++ b/share/docs/DocKit/bin/docmapparse.pl
@@ -0,0 +1,25 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use File::Basename;
+use Carp;
+BEGIN {
+    use lib dirname($0);
+}
+
+use CouchDocs;
+
+use Getopt::Long;
+
+my $global_opts = {};
+GetOptions("opt=s%" => \$global_opts);
+
+my ($srcfile) = (@ARGV);
+
+croak("Must specify source files") unless(defined($srcfile));
+
+my $couchdocs = new CouchDocs($global_opts);
+
+$couchdocs->docmapparse($srcfile);
+

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/finalize-epub.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/finalize-epub.pl b/share/docs/DocKit/bin/finalize-epub.pl
new file mode 100755
index 0000000..0692862
--- /dev/null
+++ b/share/docs/DocKit/bin/finalize-epub.pl
@@ -0,0 +1,43 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use File::Basename;
+use Carp;
+BEGIN {
+    use lib dirname($0);
+}
+
+use CouchDocs;
+
+use Getopt::Long;
+
+my $global_opts = {};
+GetOptions("opt=s%" => \$global_opts);
+
+my $couchdocs = new CouchDocs($global_opts);
+
+# finalize-epub.pl - Finalizes an EPUB document
+
+my $epubdir = shift or die "EPUB files directory must be specified";
+
+my $contentfile = "$epubdir/OEBPS/content.opf";
+
+my $content = $couchdocs->slurp_file($contentfile);
+
+# Forcibly add a better Creator string
+
+$content =~ s{</dc:title>}{</dc:title>\n    <dc:creator xmlns:dc="http://purl.org/dc/elements/1.1/">Apache, Inc.</dc:creator>};
+
+# Fix the image references in the content file
+
+$content =~ s{images/(published|source|generated)/}{images/}g;
+
+# Look for a cover image, and then add it if it exists
+
+if (-f "$epubdir/OEBPS/images/epub-logo.png")
+{
+    $content =~ s{<manifest>}{<manifest>\n      <item id="cover-image" href="images/epub-logo.png" media-type="image/png"/>};
+}
+
+$couchdocs->write_file($contentfile,$content);

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/finalparse.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/finalparse.pl b/share/docs/DocKit/bin/finalparse.pl
new file mode 100755
index 0000000..811f7be
--- /dev/null
+++ b/share/docs/DocKit/bin/finalparse.pl
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use File::Basename;
+use Carp;
+BEGIN {
+    use lib dirname($0);
+}
+
+use CouchDocs;
+
+use Getopt::Long;
+
+my $global_opts = {};
+GetOptions("opt=s%" => \$global_opts);
+
+my ($srcfile,$destfile) = (@ARGV);
+
+croak("Must specify source/destination files") unless(defined($srcfile) &&
+                                                      defined($destfile));
+
+my $couchdocs = new CouchDocs($global_opts);
+$couchdocs->{tool_dependencies}->{$destfile}->{$0} = 1;
+
+$couchdocs->finalparse($srcfile,$destfile);
+

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/metaparse.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/metaparse.pl b/share/docs/DocKit/bin/metaparse.pl
new file mode 100755
index 0000000..7ea8eaa
--- /dev/null
+++ b/share/docs/DocKit/bin/metaparse.pl
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use File::Basename;
+use Carp;
+BEGIN {
+    use lib dirname($0);
+}
+
+use CouchDocs;
+
+use Getopt::Long;
+
+my $global_opts = {};
+GetOptions("opt=s%" => \$global_opts);
+
+my ($srcfile,$destfile) = (@ARGV);
+
+croak("Must specify source/destination files") unless(defined($srcfile) &&
+                                                      defined($destfile));
+
+my $couchdocs = new CouchDocs($global_opts);
+$couchdocs->{tool_dependencies}->{$destfile}->{$0} = 1;
+
+$couchdocs->metaparse($srcfile,$destfile);
+

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/metavalidate.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/metavalidate.pl b/share/docs/DocKit/bin/metavalidate.pl
new file mode 100755
index 0000000..3597f5d
--- /dev/null
+++ b/share/docs/DocKit/bin/metavalidate.pl
@@ -0,0 +1,26 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use File::Basename;
+use Carp;
+BEGIN {
+    use lib dirname($0);
+}
+
+use CouchDocs;
+
+use Getopt::Long;
+
+my $global_opts = {};
+GetOptions("opt=s%" => \$global_opts);
+
+my ($metafile) = (@ARGV);
+$metafile =~ s/\.xml//g;
+
+croak("Must specify XML meta file") unless(defined($metafile));
+
+my $couchdocs = new CouchDocs($global_opts);
+
+$couchdocs->metavalidate($metafile);
+

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/update-image-size.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/update-image-size.pl b/share/docs/DocKit/bin/update-image-size.pl
new file mode 100755
index 0000000..4f928b5
--- /dev/null
+++ b/share/docs/DocKit/bin/update-image-size.pl
@@ -0,0 +1,160 @@
+#! /usr/bin/perl -w
+# vim:set ts=2 sw=2 expandtab:
+
+# update-image-size.pl
+# 
+# Parses DocBook XML and finds and sets the content height/width on images
+
+# Martin MC Brown
+# mc.brown@sun.com
+# 2006-06-29
+
+use strict;
+use IO::File;
+use Image::Info qw/image_info/;;
+use File::Basename;
+use Getopt::Long;
+use Cwd;
+use DateTime;
+
+my $dt = DateTime->now();
+
+my $opt_fixhtml = undef;
+
+GetOptions(
+           'fixhtml' => \$opt_fixhtml,
+    );
+
+my $dir = getcwd();
+
+my $dqbase = (split(m{/},$dir))[-2];
+
+foreach my $sourcefile (@ARGV)
+{
+    my $destfile = "$sourcefile.tmp";
+    my $fileid = $sourcefile;
+    $fileid =~ s/\.html$//g;
+    
+    my $dirname = dirname($sourcefile);
+
+    my $ifh = new IO::File("$sourcefile") or die "Error: Can't open source file $sourcefile ($!)\n";
+    $ifh->binmode(':utf8');
+    my $text = join('',<$ifh>);
+    $ifh->close();
+
+# Skip the file has been marked for no image sizing updates
+
+    if ($text =~ m/<!-- NOIMAGESIZEUPDATE -->/)
+    {
+        next;
+    }
+
+    my @imgs = ($text =~ m{(<imagedata.*?/>)}msg);
+    my @htmlimgs = ($text =~ m{(<img.*?>)}msg);
+
+    foreach my $img (@imgs)
+    {
+        my $newimg = $img;
+        my ($fileref) = ($img =~ m/fileref="(.*?)"/);
+        
+        my $image = image_info("$dirname/$fileref");
+        if (!defined($image) or $image->{error})
+        {
+            print STDERR "Couldn't parse image info for $fileref (within $sourcefile)\n";
+        }
+        else
+        {
+            my $imagewidth = $image->{width};
+            my $imageheight = $image->{height};
+            my $ratio = ($imagewidth/$imageheight);
+            
+            $newimg =~ s/(contentdepth=".*?")|(contentwidth=".*?")|(width=".*?")|(depth=".*?")|(scalefit=".*?")|(  +)//g;
+            
+            if ($newimg !~ m/(scalefit=".*?")/)
+            {
+                $newimg =~ s/<imagedata/<imagedata scalefit="1" /;
+            }
+            
+            if ($newimg =~ m/(contentdepth=".*?")/)
+            {
+                $newimg =~ s/contentdepth=".*?"/contentdepth="100%" /;
+            }
+            else
+            {
+                $newimg =~ s/<imagedata/<imagedata contentdepth="100%" /;
+            }
+            
+            if ($newimg =~ m/(width=".*?")/)
+            {
+                $newimg =~ s/width=".*?"/width="100%" /;
+            }
+            else
+            {
+                $newimg =~ s/<imagedata/<imagedata width="100%" /;
+            }
+            
+            $newimg =~ s/(  +)/ /g;
+            
+            my $repl = quotemeta($img);
+            $text =~ s{$repl}{$newimg}msg;
+        }
+    }
+
+     foreach my $img (@htmlimgs)
+    {
+        my $newimg = $img;
+        next if ($img =~ m/logo.png/);
+        
+        if ($newimg =~ m/(width=".*?")/)
+        {
+            $newimg =~ s/width=".*?"/width="100%"/;
+        }
+        else
+        {
+            $newimg =~ s/<img/<img width="100%" /;
+        }
+        
+        if ($newimg =~ m/(height=".*?")/)
+        {
+            $newimg =~ s/height=".*?"/height="100%" /;
+        }
+        else
+        {
+            $newimg =~ s/<img/<img height="100%" /;
+        }
+        
+        $newimg =~ s/(  +)/ /g;
+        
+        my $repl = quotemeta($img);
+        $text =~ s{$repl}{$newimg}msg;
+    }
+
+# Replace the __DOCID__ with out document ID
+
+    $text =~ s/__DOCID__/$dqbase/;
+
+# Replace the HTML Meta build date
+
+    my $builddate = sprintf('%04d-%02d-%02dT%02d:%02d:%02d+00:00',
+                            $dt->year,
+                            $dt->month,
+                            $dt->day,
+                            $dt->hour,
+                            $dt->minute,
+                            $dt->second,
+        );
+
+    if (defined($opt_fixhtml) &&
+        $opt_fixhtml == 1)
+    {
+        $text =~ s/__meta_html_builddate__/$builddate/msg;
+    }
+
+    my $ofh = new IO::File($destfile,'w') or die "Error: Can't open destination file $destfile ($!)\n";
+    $ofh->binmode(':utf8');
+    print $ofh $text;
+    $ofh->close();
+    
+    unlink($sourcefile);
+    rename($destfile,$sourcefile);
+}

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/versionparse.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/versionparse.pl b/share/docs/DocKit/bin/versionparse.pl
new file mode 100755
index 0000000..a45f762
--- /dev/null
+++ b/share/docs/DocKit/bin/versionparse.pl
@@ -0,0 +1,27 @@
+#!/usr/bin/perl
+
+use warnings;
+use strict;
+use File::Basename;
+use Carp;
+BEGIN {
+    use lib dirname($0);
+}
+
+use CouchDocs;
+
+use Getopt::Long;
+
+my $global_opts = {};
+GetOptions("opt=s%" => \$global_opts);
+
+my ($srcfile,$outfile) = (@ARGV);
+
+croak "Source file not found\n" unless(defined($srcfile) && (-f $outfile));
+croak "Destination file not found\n" unless(defined($outfile) && (-f $outfile));
+
+if ((stat($srcfile))[1] == (stat($outfile))[1])
+{
+    carp "Files are the same, skipping\n";
+    exit(0);
+}

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/xmlformat-config.conf
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/xmlformat-config.conf b/share/docs/DocKit/bin/xmlformat-config.conf
new file mode 100644
index 0000000..42a4d41
--- /dev/null
+++ b/share/docs/DocKit/bin/xmlformat-config.conf
@@ -0,0 +1,53 @@
+*DEFAULT
+        format          block
+        subindent       2
+        entry-break     2
+        exit-break      2
+        element-break   2
+        normalize       no
+        wrap-length     0
+
+*DOCUMENT
+        format          block
+        subindent       0
+        entry-break     0
+        exit-break      1
+        element-break   1
+        normalize       no
+        wrap-length     0
+
+configclasses description configopt default
+        format          block
+        entry-break     1
+        exit-break      1
+        element-break   1
+        normalize       yes
+        wrap-length     72
+
+default
+        format          block
+        entry-break     0
+        exit-break      0
+        element-break   0
+        normalize       yes
+        wrap-length     72
+
+configclsses
+        element-break   2
+
+configclass configopt 
+        format          block
+        entry-break     2
+        exit-break      2
+        element-break   1
+        normalize       yes
+        wrap-length     72
+
+description valuespec
+        format          block
+        entry-break     1
+        exit-break      1
+        element-break   1
+        normalize       yes
+        wrap-length     72
+

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/xmlformat-docbook.conf
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/xmlformat-docbook.conf b/share/docs/DocKit/bin/xmlformat-docbook.conf
new file mode 100644
index 0000000..21295ef
--- /dev/null
+++ b/share/docs/DocKit/bin/xmlformat-docbook.conf
@@ -0,0 +1,81 @@
+*DEFAULT
+        format          block
+        subindent       2
+        entry-break     2
+        exit-break      2
+        element-break   2
+        normalize       no
+        wrap-length     0
+
+*DOCUMENT
+        format          block
+        subindent       0
+        entry-break     0
+        exit-break      1
+        element-break   1
+        normalize       no
+        wrap-length     0
+
+para
+        format          block
+        entry-break     1
+        exit-break      1
+        normalize       yes
+        wrap-length     72
+
+remark
+        format          block
+        entry-break     0
+        exit-break      0
+        normalize       no
+        wrap-length     0
+        element-break   0
+
+informaltable table tbody thead tfoot tgroup colspec row entrytbl tr caption entry
+    format          block
+    normalize       no
+    entry-break     1
+    element-break   1
+    exit-break      1
+    subindent       2
+
+listitem varlistentry note caution tip warning important
+    format          block
+    entry-break     1
+    exit-break      1
+
+book bookinfo chapter appendix part section \
+abstract article articleinfo sect1 sect2 sect3 sect4 formalpara
+    format          block
+
+emphasis literal command option replaceable \
+xref link ulink filename quote guilabel \
+guimenu guibutton guimenuitem methodname \
+subscript superscript filename firstterm
+        format          inline
+
+bridgehead
+    format          block
+    entry-break     1
+    element-break   1
+    exit-break      1
+    normalize       yes
+
+title
+        format          block
+        entry-break     0
+        exit-break      0
+        normalize       yes
+        wrap-length     72
+
+informaltable table tbody thead tfoot tgroup colspec row entrytbl tr caption
+    format          block
+    normalize       no
+    entry-break     1
+    element-break   1
+    exit-break      1
+    subindent       2
+
+programlisting
+    format          verbatim
+

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/xmlformat-json.conf
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/xmlformat-json.conf b/share/docs/DocKit/bin/xmlformat-json.conf
new file mode 100644
index 0000000..e16bc16
--- /dev/null
+++ b/share/docs/DocKit/bin/xmlformat-json.conf
@@ -0,0 +1,26 @@
+*DEFAULT
+        format          block
+        subindent       2
+        entry-break     2
+        exit-break      2
+        element-break   2
+        normalize       no
+        wrap-length     0
+
+*DOCUMENT
+        format          block
+        subindent       0
+        entry-break     0
+        exit-break      1
+        element-break   1
+        normalize       no
+        wrap-length     0
+
+description fielddesc
+        format          block
+        entry-break     1
+        exit-break      1
+        element-break   0
+        normalize       yes
+        wrap-length     72
+

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/xmlformat-urlapi.conf
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/xmlformat-urlapi.conf b/share/docs/DocKit/bin/xmlformat-urlapi.conf
new file mode 100644
index 0000000..4056931
--- /dev/null
+++ b/share/docs/DocKit/bin/xmlformat-urlapi.conf
@@ -0,0 +1,26 @@
+*DEFAULT
+        format          block
+        subindent       2
+        entry-break     2
+        exit-break      2
+        element-break   2
+        normalize       no
+        wrap-length     0
+
+*DOCUMENT
+        format          block
+        subindent       0
+        entry-break     0
+        exit-break      1
+        element-break   1
+        normalize       no
+        wrap-length     0
+
+description request response argdesc returncode
+        format          block
+        entry-break     1
+        exit-break      1
+        element-break   0
+        normalize       yes
+        wrap-length     72
+

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/bin/xmlformat.pl
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/bin/xmlformat.pl b/share/docs/DocKit/bin/xmlformat.pl
new file mode 100755
index 0000000..877f5f1
--- /dev/null
+++ b/share/docs/DocKit/bin/xmlformat.pl
@@ -0,0 +1,1745 @@
+#! /usr/bin/perl -w 
+# vim:set ts=2 sw=2 expandtab:
+
+# xmlformat - configurable XML file formatter/pretty-printer
+
+# Copyright (c) 2004, 2005 Kitebird, LLC.  All rights reserved.
+# Some portions are based on the REX shallow XML parser, which
+# is Copyright (c) 1998, Robert D. Cameron. These include the
+# regular expression parsing variables and the shallow_parse()
+# method.
+# This software is licensed as described in the file LICENSE,
+# which you should have received as part of this distribution.
+
+# Syntax: xmlformat [config-file] xml-file
+
+# Default config file is $ENV{XMLFORMAT_CONF} or ./xmlformat.conf, in that
+# order.
+
+# Paul DuBois
+# paul@kitebird.com
+# 2003-12-14
+
+# The input document first is parsed into a list of strings.  Each string
+# represents one of the following:
+# - text node
+# - processing instruction (the XML declaration is treated as a PI)
+# - comment
+# - CDATA section
+# - DOCTYPE declaration
+# - element tag (either <abc>, </abc>, or <abc/>), *including attributes*
+
+# Entities are left untouched. They appear in their original form as part
+# of the text node in which they occur.
+
+# The list of strings then is converted to a hierarchical structure.
+# The document top level is represented by a reference to a list.
+# Each list element is a reference to a node -- a hash that has "type"
+# and "content" key/value pairs. The "type" key indicates the node
+# type and has one of the following values:
+
+# "text"    - text node
+# "pi"      - processing instruction node
+# "comment" - comment node
+# "CDATA"   - CDATA section node
+# "DOCTYPE" - DOCTYPE node
+# "elt"     - element node
+
+# (For purposes of this program, it's really only necessary to have "text",
+# "elt", and "other".  The types other than "text" and "elt" currently are
+# all treated the same way.)
+
+# For all but element nodes, the "content" value is the text of the node.
+
+# For element nodes, the "content" hash is a reference to a list of
+# nodes for the element's children. In addition, an element node has
+# three additional key/value pairs:
+# - The "name" value is the tag name within the opening tag, minus angle
+#   brackets or attributes.
+# - The "open_tag" value is the full opening tag, which may also be the
+#   closing tag.
+# - The "close_tag" value depends on the opening tag.  If the open tag is
+#   "<abc>", the close tag is "</abc>". If the open tag is "<abc/>", the
+#   close tag is the empty string.
+
+# If the tree structure is converted back into a string with
+# tree_stringify(), the result can be compared to the input file
+# as a regression test. The string should be identical to the original
+# input document.
+
+use strict;
+
+use Getopt::Long;
+$Getopt::Long::ignorecase = 0; # options are case sensitive
+$Getopt::Long::bundling = 1;   # allow short options to be bundled
+
+my $PROG_NAME = "xmlformat";
+my $PROG_VERSION = "1.04";
+my $PROG_LANG = "Perl";
+
+# ----------------------------------------------------------------------
+
+package XMLFormat;
+
+use strict;
+
+# ----------------------------------------------------------------------
+
+# Regular expressions for parsing document components. Based on REX.
+
+# SPE = shallow parsing expression
+# SE = scanning expression
+# CE = completion expression
+# RSB = right square brackets
+# QM = question mark
+
+my $TextSE = "[^<]+";
+my $UntilHyphen = "[^-]*-";
+my $Until2Hyphens = "$UntilHyphen(?:[^-]$UntilHyphen)*-";
+my $CommentCE = "$Until2Hyphens>?";
+my $UntilRSBs = "[^\\]]*\\](?:[^\\]]+\\])*\\]+";
+my $CDATA_CE = "$UntilRSBs(?:[^\\]>]$UntilRSBs)*>";
+my $S = "[ \\n\\t\\r]+";
+my $NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]";
+my $NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]";
+my $Name = "(?:$NameStrt)(?:$NameChar)*";
+my $QuoteSE = "\"[^\"]*\"|'[^']*'";
+my $DT_IdentSE = "$S$Name(?:$S(?:$Name|$QuoteSE))*";
+my $MarkupDeclCE = "(?:[^\\]\"'><]+|$QuoteSE)*>";
+my $S1 = "[\\n\\r\\t ]";
+my $UntilQMs = "[^?]*\\?+";
+my $PI_Tail = "\\?>|$S1$UntilQMs(?:[^>?]$UntilQMs)*>";
+my $DT_ItemSE =
+"<(?:!(?:--$Until2Hyphens>|[^-]$MarkupDeclCE)|\\?$Name(?:$PI_Tail))|%$Name;|$S";
+my $DocTypeCE = "$DT_IdentSE(?:$S)?(?:\\[(?:$DT_ItemSE)*\\](?:$S)?)?>?";
+my $DeclCE =
+"--(?:$CommentCE)?|\\[CDATA\\[(?:$CDATA_CE)?|DOCTYPE(?:$DocTypeCE)?";
+my $PI_CE = "$Name(?:$PI_Tail)?";
+my $EndTagCE = "$Name(?:$S)?>?";
+my $AttValSE = "\"[^<\"]*\"|'[^<']*'";
+my $ElemTagCE = "$Name(?:$S$Name(?:$S)?=(?:$S)?(?:$AttValSE))*(?:$S)?/?>?";
+my $MarkupSPE =
+"<(?:!(?:$DeclCE)?|\\?(?:$PI_CE)?|/(?:$EndTagCE)?|(?:$ElemTagCE)?)";
+my $XML_SPE = "$TextSE|$MarkupSPE";
+
+# ----------------------------------------------------------------------
+
+# Allowable options and their possible values:
+# - The keys of this hash are the allowable option names
+# - The value for each key is list of allowable option values
+# - If the value is undef, the option value must be numeric
+# If any new formatting option is added to this program, it
+# must be specified here, *and* a default value for it should
+# be listed in the *DOCUMENT and *DEFAULT pseudo-element
+# option hashes.
+
+my %opt_list = (
+  "format"    => [ "block", "inline", "verbatim" ],
+  "normalize"   => [ "yes", "no" ],
+  "subindent"   => undef,
+  "wrap-length" => undef,
+  "entry-break" => undef,
+  "exit-break"  => undef,
+  "element-break" => undef
+);
+
+# Object creation: set up the default formatting configuration
+# and variables for maintaining input and output document.
+
+sub new
+{
+my $type = shift;
+
+  my $self = {};
+
+  # Formatting options for each element.
+
+  $self->{elt_opts} = { };
+
+  # The formatting options for the *DOCUMENT and *DEFAULT pseudo-elements can
+  # be overridden in the configuration file, but the options must also be
+  # built in to make sure they exist if not specified in the configuration
+  # file.  Each of the structures must have a value for every option.
+
+  # Options for top-level document children.
+  # - Do not change entry-break: 0 ensures no extra newlines before
+  #   first element of output.
+  # - Do not change exit-break: 1 ensures a newline after final element
+  #   of output document.
+  # - It's probably best not to change any of the others, except perhaps
+  #   if you want to increase the element-break.
+
+  $self->{elt_opts}->{"*DOCUMENT"} = {
+    "format"    => "block",
+    "normalize"   => "no",
+    "subindent"   => 0,
+    "wrap-length" => 0,
+    "entry-break" => 0, # do not change
+    "exit-break"  => 1, # do not change
+    "element-break" => 1
+  };
+
+  # Default options. These are used for any elements in the document
+  # that are not specified explicitly in the configuration file.
+
+  $self->{elt_opts}->{"*DEFAULT"} = {
+    "format"    => "block",
+    "normalize"   => "no",
+    "subindent"   => 1,
+    "wrap-length" => 0,
+    "entry-break" => 1,
+    "exit-break"  => 1,
+    "element-break" => 1
+  };
+
+  # Run the *DOCUMENT and *DEFAULT options through the option-checker
+  # to verify that the built-in values are legal.
+
+  my $err_count = 0;
+
+  foreach my $elt_name (keys (%{$self->{elt_opts}}))  # ... for each element
+  {
+    # Check each option for element
+    while (my ($opt_name, $opt_val) = each (%{$self->{elt_opts}->{$elt_name}}))
+    {
+      my $err_msg;
+
+      ($opt_val, $err_msg) = check_option ($opt_name, $opt_val);
+      if (!defined ($err_msg))
+      {
+        $self->{elt_opts}->{$elt_name}->{$opt_name} = $opt_val;
+      }
+      else
+      {
+        warn "LOGIC ERROR: $elt_name default option is invalid\n";
+        warn "$err_msg\n";
+        ++$err_count;
+      }
+    }
+  }
+
+  # Make sure that the every option is represented in the
+  # *DOCUMENT and *DEFAULT structures.
+
+  foreach my $opt_name (keys (%opt_list))
+  {
+    foreach my $elt_name (keys (%{$self->{elt_opts}}))
+    {
+      if (!exists ($self->{elt_opts}->{$elt_name}->{$opt_name}))
+      {
+        warn "LOGIC ERROR: $elt_name has no default '$opt_name' option\n";
+        ++$err_count;
+      }
+    }
+  }
+
+  die "Cannot continue; internal default formatting options must be fixed\n"
+    if $err_count > 0;
+
+  bless $self, $type;    # bless object and return it
+}
+
+# Initialize the variables that are used per-document
+
+sub init_doc_vars
+{
+my $self = shift;
+
+  # Elements that are used in the document but not named explicitly
+  # in the configuration file.
+
+  $self->{unconf_elts} = { };
+
+  # List of tokens for current document.
+
+  $self->{tokens} = [ ];
+
+  # List of line numbers for each token
+
+  $self->{line_num} = [ ];
+
+  # Document node tree (constructed from the token list).
+
+  $self->{tree} = [ ];
+
+  # Variables for formatting operations:
+  # out_doc = resulting output document (constructed from document tree)
+  # pending = array of pending tokens being held until flushed
+
+  $self->{out_doc} = "";
+  $self->{pending} = [ ];
+
+  # Inline elements within block elements are processed using the
+  # text normalization (and possible line-wrapping) values of their
+  # enclosing block. Blocks and inlines may be nested, so we maintain
+  # a stack that allows the normalize/wrap-length values of the current
+  # block to be determined.
+
+  $self->{block_name_stack} = [ ];  # for debugging
+  $self->{block_opts_stack} = [ ];
+
+  # A similar stack for maintaining each block's current break type.
+
+  $self->{block_break_type_stack} = [ ];
+}
+
+# Accessors for token list and resulting output document
+
+sub tokens
+{
+my $self = shift;
+
+  return $self->{tokens};
+}
+
+sub out_doc
+{
+my $self = shift;
+
+  return $self->{out_doc};
+}
+
+
+# Methods for adding strings to output document or
+# to the pending output array
+
+sub add_to_doc
+{
+my ($self, $str) = @_;
+
+  $self->{out_doc} .= $str;
+}
+
+sub add_to_pending
+{
+my ($self, $str) = @_;
+
+  push (@{$self->{pending}}, $str);
+}
+
+
+# Block stack mainenance methods
+
+# Push options onto or pop options off from the stack.  When doing
+# this, also push or pop an element onto the break-level stack.
+
+sub begin_block
+{
+my ($self, $name, $opts) = @_;
+
+  push (@{$self->{block_name_stack}}, $name);
+  push (@{$self->{block_opts_stack}}, $opts);
+  push (@{$self->{block_break_type_stack}}, "entry-break");
+}
+
+sub end_block
+{
+my $self = shift;
+
+  pop (@{$self->{block_name_stack}});
+  pop (@{$self->{block_opts_stack}});
+  pop (@{$self->{block_break_type_stack}});
+}
+
+# Return the current block's normalization status or wrap length
+
+sub block_normalize
+{
+my $self = shift;
+
+  my $size = @{$self->{block_opts_stack}};
+  my $opts = $self->{block_opts_stack}->[$size-1];
+  return $opts->{normalize} eq "yes";
+}
+
+sub block_wrap_length
+{
+my $self = shift;
+
+  my $size = @{$self->{block_opts_stack}};
+  my $opts = $self->{block_opts_stack}->[$size-1];
+  return $opts->{"wrap-length"};
+}
+
+# Set the current block's break type, or return the number of newlines
+# for the block's break type
+
+sub set_block_break_type
+{
+my ($self, $type) = @_;
+
+  my $size = @{$self->{block_break_type_stack}};
+  $self->{block_break_type_stack}->[$size-1] = $type;
+}
+
+sub block_break_value
+{
+my $self = shift;
+
+  my $size = @{$self->{block_opts_stack}};
+  my $opts = $self->{block_opts_stack}->[$size-1];
+  $size = @{$self->{block_break_type_stack}};
+  my $type = $self->{block_break_type_stack}->[$size-1];
+  return $opts->{$type};
+}
+
+
+# ----------------------------------------------------------------------
+
+# Read configuration information.  For each element, construct a hash
+# containing a hash key and value for each option name and value.
+# After reading the file, fill in missing option values for
+# incomplete option structures using the *DEFAULT options.
+
+sub read_config
+{
+my $self = shift;
+my $conf_file = shift;
+my @elt_names = ();
+my $err_msg;
+my $in_continuation = 0;
+my $saved_line = "";
+
+  open (FH, $conf_file) or die "Cannot read config file $conf_file: $!\n";
+  while (<FH>)
+  {
+    chomp;
+
+    next if /^\s*($|#)/;  # skip blank lines, comments
+    if ($in_continuation)
+    {
+      $_ = $saved_line . " " . $_;
+      $saved_line = "";
+      $in_continuation = 0;
+    }
+    if (!/^\s/)
+    {
+      # Line doesn't begin with whitespace, so it lists element names.
+      # Names are separated by whitespace or commas, possibly followed
+      # by a continuation character or a comment.
+      if (/\\$/)
+      {
+        s/\\$//;                          # remove continuation character
+        $saved_line = $_;
+        $in_continuation = 1;
+        next;
+      }
+      s/\s*#.*$//;                        # remove any trailing comment
+      @elt_names = split (/[\s,]+/, $_);
+      # make sure each name has an entry in the elt_opts structure
+      foreach my $elt_name (@elt_names)
+      {
+        $self->{elt_opts}->{$elt_name} = { }
+          unless exists ($self->{elt_opts}->{$elt_name});
+      }
+    }
+    else
+    {
+      # Line begins with whitespace, so it contains an option
+      # to apply to the current element list, possibly followed by
+      # a comment.  First check that there is a current list.
+      # Then parse the option name/value.
+
+      die "$conf_file:$.: Option setting found before any "
+          . "elements were named.\n"
+        if !@elt_names;
+      s/\s*#.*$//;
+      my ($opt_name, $opt_val) = /^\s+(\S+)(?:\s+|\s*=\s*)(\S+)$/;
+      die "$conf_file:$.: Malformed line: $_\n" unless defined ($opt_val);
+
+      # Check option. If illegal, die with message. Otherwise,
+      # add option to each element in current element list
+
+      ($opt_val, $err_msg) = check_option ($opt_name, $opt_val);
+      die "$conf_file:$.: $err_msg\n" if defined ($err_msg);
+      foreach my $elt_name (@elt_names)
+      {
+        $self->{elt_opts}->{$elt_name}->{$opt_name} = $opt_val;
+      }
+    }
+  }
+  close (FH);
+
+  # For any element that has missing option values, fill in the values
+  # using the options for the *DEFAULT pseudo-element.  This speeds up
+  # element option lookups later.  It also makes it unnecessary to test
+  # each option to see if it's defined: All element option structures
+  # will have every option defined.
+
+  my $def_opts = $self->{elt_opts}->{"*DEFAULT"};
+
+  foreach my $elt_name (keys (%{$self->{elt_opts}}))
+  {
+    next if $elt_name eq "*DEFAULT";
+    foreach my $opt_name (keys (%{$def_opts}))
+    {
+      next if exists ($self->{elt_opts}->{$elt_name}->{$opt_name}); # already set
+      $self->{elt_opts}->{$elt_name}->{$opt_name} = $def_opts->{$opt_name};
+    }
+  }
+}
+
+
+# Check option name to make sure it's legal. Check the value to make sure
+# that it's legal for the name.  Return a two-element array:
+# (value, undef) if the option name and value are legal.
+# (undef, message) if an error was found; message contains error message.
+# For legal values, the returned value should be assigned to the option,
+# because it may get type-converted here.
+
+sub check_option
+{
+my ($opt_name, $opt_val) = @_;
+
+  # - Check option name to make sure it's a legal option
+  # - Then check the value.  If there is a list of values
+  #   the value must be one of them.  Otherwise, the value
+  #   must be an integer.
+
+  return (undef, "Unknown option name: $opt_name")
+    unless exists ($opt_list{$opt_name});
+  my $allowable_val = $opt_list{$opt_name};
+  if (defined ($allowable_val))
+  {
+    return (undef, "Unknown '$opt_name' value: $opt_val")
+      unless grep (/^$opt_val$/, @{$allowable_val});
+  }
+  else  # other options should be numeric
+  {
+    # "$opt_val" converts $opt_val to string for pattern match
+    return (undef, "'$opt_name' value ($opt_val) should be an integer")
+      unless "$opt_val" =~ /^\d+$/;
+  }
+  return ($opt_val, undef);
+}
+
+
+# Return hash of option values for a given element.  If no options are found:
+# - Add the element name to the list of unconfigured options.
+# - Assign the default options to the element.  (This way the test for the
+#   option fails only once.)
+
+sub get_opts
+{
+my $self = shift;
+my $elt_name = shift;
+
+  my $opts = $self->{elt_opts}->{$elt_name};
+  if (!defined ($opts))
+  {
+    $self->{unconf_elts}->{$elt_name} = 1;
+    $opts = $self->{elt_opts}->{$elt_name} = $self->{elt_opts}->{"*DEFAULT"};
+  }
+  return $opts;
+}
+
+
+# Display contents of configuration options to be used to process document.
+# For each element named in the elt_opts structure, display its format
+# type, and those options that apply to the type.
+
+sub display_config
+{
+my $self = shift;
+# Format types and the additional options that apply to each type
+my $format_opts = {
+  "block" => [
+              "entry-break",
+              "element-break",
+              "exit-break",
+              "subindent",
+              "normalize",
+              "wrap-length"
+              ],
+  "inline" => [ ],
+  "verbatim" => [ ]
+};
+
+  foreach my $elt_name (sort (keys (%{$self->{elt_opts}})))
+  {
+    print "$elt_name\n";
+    my %opts = %{$self->{elt_opts}->{$elt_name}};
+    my $format = $opts{format};
+    # Write out format type, then options that apply to the format type
+    print "  format = $format\n";
+    foreach my $opt_name (@{$format_opts->{$format}})
+    {
+      print "  $opt_name = $opts{$opt_name}\n";
+    }
+    print "\n";
+  }
+}
+
+
+# Display the list of elements that are used in the document but not
+# configured in the configuration file.
+
+# Then re-unconfigure the elements so that they won't be considered
+# as configured for the next document, if there is one.
+
+sub display_unconfigured_elements
+{
+my $self = shift;
+
+  my @elts = keys (%{$self->{unconf_elts}});
+  if (@elts == 0)
+  {
+    print "The document contains no unconfigured elements.\n";
+  }
+  else
+  {
+    print "The following document elements were assigned no formatting options:\n";
+    foreach my $line ($self->line_wrap ([ join (" ", sort (@elts)) ], 0, 0, 65))
+    {
+      print "$line\n";
+    }
+  }
+
+  foreach my $elt_name (@elts)
+  {
+    delete ($self->{elt_opts}->{$elt_name});
+  }
+}
+
+# ----------------------------------------------------------------------
+
+# Main document processing routine.
+# - Argument is a string representing an input document
+# - Return value is the reformatted document, or undef. An undef return
+#   signifies either that an error occurred, or that some option was
+#   given that suppresses document output. In either case, don't write
+#   any output for the document.  Any error messages will already have
+#   been printed when this returns.
+
+sub process_doc
+{
+my $self = shift;
+my ($doc, $verbose, $check_parser, $canonize_only, $show_unconf_elts) = @_;
+my $str;
+
+  $self->init_doc_vars ();
+
+  # Perform lexical parse to split document into list of tokens
+  warn "Parsing document...\n" if $verbose;
+  $self->shallow_parse ($doc);
+
+  if ($check_parser)
+  {
+    warn "Checking parser...\n" if $verbose;
+    # concatentation of tokens should be identical to original document
+    if ($doc eq join ("", @{$self->tokens ()}))
+    {
+      print "Parser is okay\n";
+    }
+    else
+    {
+      print "PARSER ERROR: document token concatenation differs from document\n";
+    }
+    return undef;
+  }
+
+  # Assign input line number to each token
+  $self->assign_line_numbers ();
+
+  # Look for and report any error tokens returned by parser
+  warn "Checking document for errors...\n" if $verbose;
+  if ($self->report_errors () > 0)
+  {
+    warn "Cannot continue processing document.\n";
+    return undef;
+  }
+
+  # Convert the token list to a tree structure
+  warn "Converting document tokens to tree...\n" if $verbose;
+  if ($self->tokens_to_tree () > 0)
+  {
+    warn "Cannot continue processing document.\n";
+    return undef;
+  }
+
+  # Check: Stringify the tree to convert it back to a single string,
+  # then compare to original document string (should be identical)
+  # (This is an integrity check on the validity of the to-tree and stringify
+  # operations; if one or both do not work properly, a mismatch should occur.)
+  #$str = $self->tree_stringify ();
+  #print $str;
+  #warn "ERROR: mismatch between document and resulting string\n" if $doc ne $str;
+
+  # Canonize tree to remove extraneous whitespace
+  warn "Canonizing document tree...\n" if $verbose;
+  $self->tree_canonize ();
+
+  if ($canonize_only)
+  {
+    print $self->tree_stringify () . "\n";
+    return undef;
+  }
+
+  # One side-effect of canonizing the tree is that the formatting
+  # options are looked up for each element in the document.  That
+  # causes the list of elements that have no explicit configuration
+  # to be built.  Display the list and return if user requested it.
+
+  if ($show_unconf_elts)
+  {
+    $self->display_unconfigured_elements ();
+    return undef;
+  }
+
+  # Format the tree to produce formatted XML as a single string
+  warn "Formatting document tree...\n" if $verbose;
+  $self->tree_format ();
+
+  # If the document is not empty, add a newline and emit a warning if
+  # reformatting failed to add a trailing newline.  This shouldn't
+  # happen if the *DOCUMENT options are set up with exit-break = 1,
+  # which is the reason for the warning rather than just silently
+  # adding the newline.
+
+  $str = $self->out_doc ();
+  if ($str ne "" && $str !~ /\n$/)
+  {
+    warn "LOGIC ERROR: trailing newline had to be added\n";
+    $str .= "\n";
+  }
+
+  return $str;
+}
+
+# ----------------------------------------------------------------------
+
+# Parse XML document into array of tokens and store array
+
+sub shallow_parse
+{ 
+my ($self, $xml_document) = @_;
+
+  $self->{tokens} = [ $xml_document =~ /$XML_SPE/g ];
+}
+
+# ----------------------------------------------------------------------
+
+# Extract a tag name from a tag and return it.
+
+# Dies if the tag cannot be found, because this is supposed to be
+# called only with a legal tag.
+
+sub extract_tag_name
+{
+my $tag = shift;
+
+  die "Cannot find tag name in tag: $tag\n" unless $tag =~ /^<\/?($Name)/;
+  return $1;
+}
+
+# ----------------------------------------------------------------------
+
+# Assign an input line number to each token.  The number indicates
+# the line number on which the token begins.
+
+sub assign_line_numbers
+{
+my $self = shift;
+my $line_num = 1;
+
+  $self->{line_num} = [ ];
+  for (my $i = 0; $i < @{$self->{tokens}}; $i++)
+  {
+    my $token = $self->{tokens}->[$i];
+    push (@{$self->{line_num}}, $line_num);
+    # count newlines and increment line counter (tr returns no. of matches)
+    $line_num += ($token =~ tr/\n/\n/);
+  }
+}
+
+# ----------------------------------------------------------------------
+
+# Check token list for errors and report any that are found. Error
+# tokens are those that begin with "<" but do not end with ">".
+
+# Returns the error count.
+
+# Does not modify the original token list.
+
+sub report_errors
+{
+my $self = shift;
+my $err_count = 0;
+
+  for (my $i = 0; $i < @{$self->{tokens}}; $i++)
+  {
+    my $token = $self->{tokens}->[$i];
+    if ($token =~ /^</ && $token !~ />$/)
+    {
+      my $line_num = $self->{line_num}->[$i];
+      warn "Malformed token at line $line_num, token " . ($i+1) . ": $token\n";
+      ++$err_count;
+    }
+  }
+  warn "Number of errors found: $err_count\n" if $err_count > 0;
+  return $err_count;
+}
+
+# ----------------------------------------------------------------------
+
+# Helper routine to print tag stack for tokens_to_tree
+
+sub print_tag_stack
+{
+my ($label, @stack) = @_;
+  if (@stack < 1)
+  {
+    warn "  $label: none\n";
+  }
+  else
+  {
+    warn "  $label:\n";
+    for (my $i = 0; $i < @stack; $i++)
+    {
+      warn "  ", ($i+1), ": ", $stack[$i], "\n";
+    }
+  }
+}
+
+# Convert the list of XML document tokens to a tree representation.
+# The implementation uses a loop and a stack rather than recursion.
+
+# Does not modify the original token list.
+
+# Returns an error count.
+
+sub tokens_to_tree
+{
+my $self = shift;
+
+  my @tag_stack = ();     # stack for element tags
+  my @children_stack = ();  # stack for lists of children
+  my $children = [ ];     # current list of children
+  my $err_count = 0;
+
+  for (my $i = 0; $i < @{$self->{tokens}}; $i++)
+  {
+    my $token = $self->{tokens}->[$i];
+    my $line_num = $self->{line_num}->[$i];
+    my $tok_err = "Error near line $line_num, token " . ($i+1) . " ($token)";
+    if ($token !~ /^</)           # text
+    {
+      push (@{$children}, text_node ($token));
+    }
+    elsif ($token =~ /^<!--/)       # comment
+    {
+      push (@{$children}, comment_node ($token));
+    }
+    elsif ($token =~ /^<\?/)        # processing instruction
+    {
+      push (@{$children}, pi_node ($token));
+    }
+    elsif ($token =~ /^<!DOCTYPE/)      # DOCTYPE
+    {
+      push (@{$children}, doctype_node ($token));
+    }
+    elsif ($token =~ /^<!\[/)       # CDATA
+    {
+      push (@{$children}, cdata_node ($token));
+    }
+    elsif ($token =~ /^<\//)        # element close tag
+    {
+      if (!@tag_stack)
+      {
+        warn "$tok_err: Close tag w/o preceding open tag; malformed document?\n";
+        ++$err_count;
+        next;
+      }
+      if (!@children_stack)
+      {
+        warn "$tok_err: Empty children stack; malformed document?\n";
+        ++$err_count;
+        next;
+      }
+      my $tag = pop (@tag_stack);
+      my $open_tag_name = extract_tag_name ($tag);
+      my $close_tag_name = extract_tag_name ($token);
+      if ($open_tag_name ne $close_tag_name)
+      {
+        warn "$tok_err: Tag mismatch; malformed document?\n";
+        warn "  open tag: $tag\n";
+        warn "  close tag: $token\n";
+        print_tag_stack ("enclosing tags", @tag_stack);
+        ++$err_count;
+        next;
+      }
+      my $elt = element_node ($tag, $token, $children);
+      $children = pop (@children_stack);
+      push (@{$children}, $elt);
+    }
+    else                  # element open tag
+    {
+      # If we reach here, we're seeing the open tag for an element:
+      # - If the tag is also the close tag (e.g., <abc/>), close the
+      #   element immediately, giving it an empty child list.
+      # - Otherwise, push tag and child list on stacks, begin new child
+      #   list for element body.
+      if ($token =~ /\/>$/)     # tag is of form <abc/>
+      {
+        push (@{$children}, element_node ($token, "", [ ]));
+      }
+      else              # tag is of form <abc>
+      {
+        push (@tag_stack, $token);
+        push (@children_stack, $children);
+        $children = [ ];
+      }
+    }
+  }
+
+  # At this point, the stacks should be empty if the document is
+  # well-formed.
+
+  if (@tag_stack)
+  {
+    warn "Error at EOF: Unclosed tags; malformed document?\n";
+    print_tag_stack ("unclosed tags", @tag_stack);
+    ++$err_count;
+  }
+  if (@children_stack)
+  {
+    warn "Error at EOF: Unprocessed child elements; malformed document?\n";
+# TODO: print out info about them
+    ++$err_count;
+  }
+
+  $self->{tree} = $children;
+  return $err_count;
+}
+
+
+# Node-generating helper methods for tokens_to_tree
+
+# Generic node generator
+
+sub node         { return { "type" => $_[0], "content" => $_[1] }; }
+
+# Generators for specific non-element nodes
+
+sub text_node    { return node ("text",    $_[0]); }
+sub comment_node { return node ("comment", $_[0]); }
+sub pi_node      { return node ("pi",      $_[0]); }
+sub doctype_node { return node ("DOCTYPE", $_[0]); }
+sub cdata_node   { return node ("CDATA",   $_[0]); }
+
+# For an element node, create a standard node with the type and content
+# key/value pairs. Then add pairs for the "name", "open_tag", and
+# "close_tag" hash keys.
+
+sub element_node
+{
+my ($open_tag, $close_tag, $children) = @_;
+
+  my $elt = node ("elt", $children);
+  # name is the open tag with angle brackets and attibutes stripped
+  $elt->{name} = extract_tag_name ($open_tag);
+  $elt->{open_tag} = $open_tag;
+  $elt->{close_tag} = $close_tag;
+  return $elt;
+}
+
+# ----------------------------------------------------------------------
+
+# Convert the given XML document tree (or subtree) to string form by
+# concatentating all of its components.  Argument is a reference
+# to a list of nodes at a given level of the tree.
+
+# Does not modify the node list.
+
+sub tree_stringify
+{
+my $self = shift;
+my $children = shift || $self->{tree}; # use entire tree if no arg;
+my $str = "";
+
+  for (my $i = 0; $i < @{$children}; $i++)
+  {
+    my $child = $children->[$i];
+
+    # - Elements have list of child nodes as content (process recursively)
+    # - All other node types have text content
+
+    if ($child->{type} eq "elt")
+    {
+      $str .= $child->{open_tag}
+          . $self->tree_stringify ($child->{content})
+          . $child->{close_tag};
+    }
+    else
+    {
+      $str .= $child->{content};
+    }
+  }
+  return $str;
+}
+
+# ----------------------------------------------------------------------
+
+
+# Put tree in "canonical" form by eliminating extraneous whitespace
+# from element text content.
+
+# $children is a list of child nodes
+
+# This function modifies the node list.
+
+# Canonizing occurs as follows:
+# - Comment, PI, DOCTYPE, and CDATA nodes remain untouched
+# - Verbatim elements and their descendants remain untouched
+# - Within non-normalized block elements:
+#   - Delete all-whitespace text node children
+#   - Leave other text node children untouched
+# - Within normalized block elements:
+#   - Convert runs of whitespace (including line-endings) to single spaces
+#   - Trim leading whitespace of first text node
+#   - Trim trailing whitespace of last text node
+#   - Trim whitespace that is adjacent to a verbatim or non-normalized
+#     sub-element.  (For example, if a <programlisting> is followed by
+#     more text, delete any whitespace at beginning of that text.)
+# - Within inline elements:
+#   - Normalize the same way as the enclosing block element, with the
+#     exception that a space at the beginning or end is not removed.
+#     (Otherwise, <para>three<literal> blind </literal>mice</para>
+#     would become <para>three<literal>blind</literal>mice</para>.)
+
+sub tree_canonize
+{
+my $self = shift;
+
+  $self->{tree} = $self->tree_canonize2 ($self->{tree}, "*DOCUMENT");
+}
+
+
+sub tree_canonize2
+{
+my $self = shift;
+my $children = shift;
+my $par_name = shift;
+
+  # Formatting options for parent
+  my $par_opts = $self->get_opts ($par_name);
+
+  # If parent is a block element, remember its formatting options on
+  # the block stack so they can be used to control canonization of
+  # inline child elements.
+
+  $self->begin_block ($par_name, $par_opts) if $par_opts->{format} eq "block";
+
+  # Iterate through list of child nodes to preserve, modify, or
+  # discard whitespace.  Return resulting list of children.
+
+  # Canonize element and text nodes. Leave everything else (comments,
+  # processing instructions, etc.) untouched.
+
+  my @new_children = ();
+
+  while (@{$children})
+  {
+    my $child = shift (@{$children});
+
+    if ($child->{type} eq "elt")
+    {
+      # Leave verbatim elements untouched. For other element nodes,
+      # canonize child list using options appropriate to element.
+
+      if ($self->get_opts ($child->{name})->{format} ne "verbatim")
+      {
+        $child->{content} = $self->tree_canonize2 ($child->{content},
+                            $child->{name});
+      }
+    }
+    elsif ($child->{type} eq "text")
+    {
+      # Delete all-whitespace node or strip whitespace as appropriate.
+
+      # Paranoia check: We should never get here for verbatim elements,
+      # because normalization is irrelevant for them.
+
+      die "LOGIC ERROR: trying to canonize verbatim element $par_name!\n"
+        if $par_opts->{format} eq "verbatim";
+
+      if (!$self->block_normalize ())
+      {
+        # Enclosing block is not normalized:
+        # - Delete child all-whitespace text nodes.
+        # - Leave other text nodes untouched.
+
+        next if $child->{content} =~ /^\s*$/;
+      }
+      else
+      {
+        # Enclosing block is normalized, so normalize this text node:
+        # - Convert runs of whitespace characters (including
+        #   line-endings characters) to single spaces.
+        # - Trim leading whitespace if this node is the first child
+        #   of a block element or it follows a non-normalized node.
+        # - Trim leading whitespace if this node is the last child
+        #   of a block element or it precedes a non-normalized node.
+
+        # These are nil if there is no prev or next child
+        my $prev_child = $new_children[$#new_children];
+        my $next_child = $children->[0];
+
+        $child->{content} =~ s/\s+/ /g;
+        $child->{content} =~ s/^ //
+          if (!defined ($prev_child) && $par_opts->{format} eq "block")
+            || $self->non_normalized_node ($prev_child);
+        $child->{content} =~ s/ $//
+          if (!defined ($next_child) && $par_opts->{format} eq "block")
+            || $self->non_normalized_node ($next_child);
+
+        # If resulting text is empty, discard the node.
+        next if $child->{content} =~ /^$/;
+      }
+    }
+    push (@new_children, $child);
+  }
+
+  # Pop block stack if parent was a block element
+  $self->end_block () if $par_opts->{format} eq "block";
+
+  return \@new_children;
+}
+
+
+# Helper function for tree_canonize().
+
+# Determine whether a node is normalized.  This is used to check
+# the node that is adjacent to a given text node (either previous
+# or following).
+# - No is node is nil
+# - No if the node is a verbatim element
+# - If the node is a block element, yes or no according to its
+#   normalize option
+# - No if the node is an inline element.  Inlines are normalized
+#   if the parent block is normalized, but this method is not called
+#   except while examinine normalized blocks. So its inline children
+#   are also normalized.
+# - No if node is a comment, PI, DOCTYPE, or CDATA section. These are
+#   treated like verbatim elements.
+
+sub non_normalized_node
+{
+my $self = shift;
+my $node = shift;
+
+  return 0 if !$node;
+  my $type = $node->{type};
+  if ($type eq "elt")
+  {
+    my $node_opts = $self->get_opts ($node->{name});
+    if ($node_opts->{format} eq "verbatim")
+    {
+      return 1;
+    }
+    if ($node_opts->{format} eq "block")
+    {
+      return $node_opts->{normalize} eq "no";
+    }
+    if ($node_opts->{format} eq "inline")
+    {
+      return 0;
+    }
+    die "LOGIC ERROR: non_normalized_node: unhandled node format.\n";
+  }
+  if ($type eq "comment" || $type eq "pi" || $type eq "DOCTYPE"
+            || $type eq "CDATA")
+  {
+    return 1;
+  }
+  if ($type eq "text")
+  {
+    die "LOGIC ERROR: non_normalized_node: got called for text node.\n";
+  }
+  die "LOGIC ERROR: non_normalized_node: unhandled node type.\n";
+}
+
+# ----------------------------------------------------------------------
+
+# Format (pretty-print) the document tree
+
+# Does not modify the node list.
+
+# The class maintains two variables for storing output:
+# - out_doc stores content that has been seen and "flushed".
+# - pending stores an array of strings (content of text nodes and inline
+#   element tags).  These are held until they need to be flushed, at
+#   which point they are concatenated and possibly wrapped/indented.
+#   Flushing occurs when a break needs to be written, which happens
+#   when something other than a text node or inline element is seen.
+
+# If parent name and children are not given, format the entire document.
+# Assume prevailing indent = 0 if not given.
+
+sub tree_format
+{
+my $self = shift;
+my $par_name = shift || "*DOCUMENT";    # format entire document if no arg
+my $children = shift || $self->{tree};  # use entire tree if no arg
+my $indent = shift || 0;
+
+  # Formatting options for parent element
+  my $par_opts = $self->get_opts ($par_name);
+
+  # If parent is a block element:
+  # - Remember its formatting options on the block stack so they can
+  #   be used to control formatting of inline child elements.
+  # - Set initial break type to entry-break.
+  # - Shift prevailing indent right before generating child content.
+
+  if ($par_opts->{format} eq "block")
+  {
+    $self->begin_block ($par_name, $par_opts);
+    $self->set_block_break_type ("entry-break");
+    $indent += $par_opts->{"subindent"};
+  }
+
+  # Variables for keeping track of whether the previous child
+  # was a text node. Used for controlling break behavior in
+  # non-normalized block elements: No line breaks are added around
+  # text in such elements, nor is indenting added.
+
+  my $prev_child_is_text = 0;
+  my $cur_child_is_text = 0;
+
+  foreach my $child (@{$children})
+  {
+    $prev_child_is_text = $cur_child_is_text;
+
+    # Text nodes: just add text to pending output
+
+    if ($child->{type} eq "text")
+    {
+      $cur_child_is_text = 1;
+      $self->add_to_pending ($child->{content});
+      next;
+    }
+
+    $cur_child_is_text = 0;
+
+    # Element nodes: handle depending on format type
+
+    if ($child->{type} eq "elt")
+    {
+      my $child_opts = $self->get_opts ($child->{name});
+
+      # Verbatim elements:
+      # - Print literally without change (use _stringify).
+      # - Do not line-wrap or add any indent.
+
+      if ($child_opts->{format} eq "verbatim")
+      {
+        $self->flush_pending ($indent);
+        $self->emit_break (0)
+          unless $prev_child_is_text && !$self->block_normalize ();
+        $self->set_block_break_type ("element-break");
+        $self->add_to_doc ($child->{open_tag}
+                          . $self->tree_stringify ($child->{content})
+                          . $child->{close_tag});
+        next;
+      }
+
+      # Inline elements:
+      # - Do not break or indent.
+      # - Do not line-wrap content; just add content to pending output
+      #   and let it be wrapped as part of parent's content.
+
+      if ($child_opts->{format} eq "inline")
+      {
+        $self->add_to_pending ($child->{open_tag});
+        $self->tree_format ($child->{name}, $child->{content}, $indent);
+        $self->add_to_pending ($child->{close_tag});
+        next;
+      }
+
+      # If we get here, node is a block element.
+
+      # - Break and flush any pending output
+      # - Break and indent (no indent if break count is zero)
+      # - Process element itself:
+      #   - Put out opening tag
+      #   - Put out element content
+      #   - Put out any indent needed before closing tag. None needed if:
+      #     - Element's exit-break is 0 (closing tag is not on new line,
+      #       so don't indent it)
+      #     - There is no separate closing tag (it was in <abc/> format)
+      #     - Element has no children (tags will be written as
+      #       <abc></abc>, so don't indent closing tag)
+      #     - Element has children, but the block is not normalized and
+      #       the last child is a text node
+      #   - Put out closing tag
+
+      $self->flush_pending ($indent);
+      $self->emit_break ($indent)
+        unless $prev_child_is_text && !$self->block_normalize ();
+      $self->set_block_break_type ("element-break");
+      $self->add_to_doc ($child->{open_tag});
+      $self->tree_format ($child->{name}, $child->{content}, $indent);
+      $self->add_to_doc (" " x $indent)
+        unless $child_opts->{"exit-break"} <= 0
+        || $child->{close_tag} eq ""
+        || !@{$child->{content}}
+        || (@{$child->{content}}
+              && $child->{content}->[$#{$child->{content}}]->{type} eq "text"
+              && $child_opts->{normalize} eq "no");
+      $self->add_to_doc ($child->{close_tag});
+      next;
+    }
+
+    # Comments, PIs, etc. (everything other than text and elements),
+    # treat similarly to verbatim block:
+    # - Flush any pending output
+    # - Put out a break
+    # - Add node content to collected output
+
+    $self->flush_pending ($indent);
+    $self->emit_break (0)
+      unless $prev_child_is_text && !$self->block_normalize ();
+    $self->set_block_break_type ("element-break");
+    $self->add_to_doc ($child->{content});
+  }
+
+  $prev_child_is_text = $cur_child_is_text;
+
+  # Done processing current element's children now.
+
+  # If current element is a block element:
+  # - If there were any children, flush any pending output and put
+  #   out the exit break.
+  # - Pop the block stack
+
+  if ($par_opts->{format} eq "block")
+  {
+    if (@{$children})
+    {
+      $self->flush_pending ($indent);
+      $self->set_block_break_type ("exit-break");
+      $self->emit_break (0)
+        unless $prev_child_is_text && !$self->block_normalize ();
+    }
+    $self->end_block ();
+  }
+}
+
+
+# Emit a break - the appropriate number of newlines according to the
+# enclosing block's current break type.
+
+# In addition, emit the number of spaces indicated by indent.  (indent
+# > 0 when breaking just before emitting an element tag that should
+# be indented within its parent element.)
+
+# Exception: Emit no indent if break count is zero. That indicates
+# any following output will be written on the same output line, not
+# indented on a new line.
+
+# Initially, when processing a node's child list, the break type is
+# set to entry-break. Each subsequent break is an element-break.
+# (After child list has been processed, an exit-break is produced as well.)
+
+sub emit_break
+{
+my ($self, $indent) = @_;
+
+  # number of newlines to emit
+  my $break_value = $self->block_break_value ();
+
+  $self->add_to_doc ("\n" x $break_value);
+  # add indent if there *was* a break
+  $self->add_to_doc (" " x $indent) if $indent > 0 && $break_value > 0;
+}
+
+
+# Flush pending output to output document collected thus far:
+# - Wrap pending contents as necessary, with indent before *each* line.
+# - Add pending text to output document (thus "flushing" it)
+# - Clear pending array.
+
+sub flush_pending
+{
+my ($self, $indent) = @_;
+
+  # Do nothing if nothing to flush
+  return if !@{$self->{pending}};
+
+  # If current block is not normalized:
+  # - Text nodes cannot be modified (no wrapping or indent).  Flush
+  #   text as is without adding a break or indent.
+  # If current block is normalized:
+  # - Add a break.
+  # - If line wrap is disabled:
+  #   - Add indent if there is a break. (If there isn't a break, text
+  #     should immediately follow preceding tag, so don't add indent.)
+  #   - Add text without wrapping
+  # - If line wrap is enabled:
+  #   - First line indent is 0 if there is no break. (Text immediately
+  #     follows preceding tag.) Otherwise first line indent is same as
+  #     prevailing indent.
+  #   - Any subsequent lines get the prevailing indent.
+
+  # After flushing text, advance break type to element-break.
+
+  my $s = "";
+
+  if (!$self->block_normalize ())
+  {
+    $s .= join ("", @{$self->{pending}});
+  }
+  else
+  {
+    $self->emit_break (0);
+    my $wrap_len = $self->block_wrap_length ();
+    my $break_value = $self->block_break_value ();
+    if ($wrap_len <= 0)
+    {
+      $s .= " " x $indent if $break_value > 0;
+      $s .= join ("", @{$self->{pending}});
+    }
+    else
+    {
+      my $first_indent = ($break_value > 0 ? $indent : 0);
+      # Wrap lines, then join by newlines (don't add one at end)
+      my @lines = $self->line_wrap ($self->{pending},
+                  $first_indent,
+                  $indent,
+                  $wrap_len);
+      $s .= join ("\n", @lines);
+    }
+  }
+
+  $self->add_to_doc ($s);
+  $self->{pending} = [ ];
+  $self->set_block_break_type ("element-break");
+}
+
+
+# Perform line-wrapping of string array to lines no longer than given
+# length (including indent).
+# Any word longer than line length appears by itself on line.
+# Return array of lines (not newline-terminated).
+
+# $strs - reference to array of text items to be joined and line-wrapped.
+# Each item may be:
+# - A tag (such as <emphasis role="bold">). This should be treated as
+#   an atomic unit, which is important for preserving inline tags intact.
+# - A possibly multi-word string (such as "This is a string"). In this
+#   latter case, line-wrapping preserves internal whitespace in the
+#   string, with the exception that if whitespace would be placed at
+#   the end of a line, it is discarded.
+
+# $first_indent - indent for first line
+# $rest_indent - indent for any remaining lines
+# $max_len - maximum length of output lines (including indent)
+
+sub line_wrap
+{
+my ($self, $strs, $first_indent, $rest_indent, $max_len) = @_;
+
+  # First, tokenize the strings
+
+  my @words = ();
+  foreach my $str (@{$strs})
+  {
+    if ($str =~ /^</)
+    {
+      # String is a tag; treat as atomic unit and don't split
+      push (@words, $str);
+    }
+    else
+    {
+      # String of white and non-white tokens.
+      # Tokenize into white and non-white tokens.
+      push (@words, ($str =~ /\S+|\s+/g));
+    }
+  }
+
+  # Now merge tokens that are not separated by whitespace tokens. For
+  # example, "<i>", "word", "</i>" gets merged to "<i>word</i>".  But
+  # "<i>", " ", "word", " ", "</i>" gets left as separate tokens.
+
+  my @words2 = ();
+  foreach my $word (@words)
+  {
+    # If there is a previous word that does not end with whitespace,
+    # and the currrent word does not begin with whitespace, concatenate
+    # current word to previous word.  Otherwise append current word to
+    # end of list of words.
+    if (@words2 && $words2[$#words2] !~ /\s$/ && $word !~ /^\s/)
+    {
+      $words2[$#words2] .= $word;
+    }
+    else
+    {
+      push (@words2, $word);
+    }
+  }
+
+  my @lines = ();
+  my $line = "";
+  my $llen = 0;
+  # set the indent for the first line
+  my $indent = $first_indent;
+  # saved-up whitespace to put before next non-white word
+  my $white = "";
+
+  foreach my $word (@words2)   # ... while words remain to wrap
+  {
+    # If word is whitespace, save it. It gets added before next
+    # word if no line-break occurs.
+    if ($word =~ /^\s/)
+    {
+       $white .= $word;
+      next;
+    }
+    my $wlen = length ($word);
+    if ($llen == 0)
+    {
+      # New output line; it gets at least one word (discard any
+      # saved whitespace)
+      $line = " " x $indent . $word;
+      $llen = $indent + $wlen;
+      $indent = $rest_indent;
+      $white = "";
+      next;
+    }
+    if ($llen + length ($white) + $wlen > $max_len)
+    {
+      # Word (plus saved whitespace) won't fit on current line.
+      # Begin new line (discard any saved whitespace).
+      push (@lines, $line);
+      $line = " " x $indent . $word;
+      $llen = $indent + $wlen;
+      $indent = $rest_indent;
+      $white = "";
+      next;
+    }
+    # add word to current line with saved whitespace between
+    $line .= $white . $word;
+    $llen += length ($white) + $wlen;
+    $white = "";
+  }
+
+  # push remaining line, if any
+  push (@lines, $line) if $line ne "";
+
+  return @lines;
+}
+
+1;
+
+# ----------------------------------------------------------------------
+
+# Begin main program
+
+package main;
+
+
+my $usage = <<EOF;
+Usage: $PROG_NAME [options] xml-file
+
+Options:
+--help, -h
+    Print this message and exit.
+--backup suffix -b suffix
+    Back up the input document, adding suffix to the input
+    filename to create the backup filename.
+--canonized-output
+    Proceed only as far as the document canonization stage,
+    printing the result.
+--check-parser
+    Parse the document into tokens and verify that their
+    concatenation is identical to the original input document.
+    This option suppresses further document processing.
+--config-file file_name, -f file_name
+    Specify the configuration filename. If no file is named,
+    xmlformat uses the file named by the environment variable
+    XMLFORMAT_CONF, if it exists, or ./xmlformat.conf, if it
+    exists. Otherwise, xmlformat uses built-in formatting
+    options.
+--in-place, -i
+    Format the document in place, replacing the contents of
+    the input file with the reformatted document. (It's a
+    good idea to use --backup along with this option.)
+--show-config
+    Show configuration options after reading configuration
+    file. This option suppresses document processing.
+--show-unconfigured-elements
+    Show elements that are used in the document but for
+    which no options were specified in the configuration
+    file. This option suppresses document output.
+--verbose, -v
+    Be verbose about processing stages.
+--version, -V
+    Show version information and exit.
+EOF
+
+# Variables for command line options; most are undefined initially.
+my $help;
+my $backup_suffix;
+my $conf_file;
+my $canonize_only;
+my $check_parser;
+my $in_place;
+my $show_conf;
+my $show_unconf_elts;
+my $show_version;
+my $verbose;
+
+GetOptions (
+  # =i means an integer argument is required after the option
+  # =s means a string argument is required after the option
+  # :s means a string argument is optional after the option
+  "help|h"           => \$help,          # print help message
+  "backup|b=s"       => \$backup_suffix, # make backup using suffix
+  "canonized-output" => \$canonize_only, # print canonized document
+  "check-parser"     => \$check_parser,  # verify parser integrity
+  "config-file|f=s"  => \$conf_file,     # config file
+  "in-place|i"       => \$in_place,      # format in place
+  "show-config"      => \$show_conf,     # show configuration file
+  # need better name
+  "show-unconfigured-elements" => \$show_unconf_elts,   # show unconfigured elements
+  "verbose|v"        => \$verbose,       # be verbose
+  "version|V"        => \$show_version,  # show version info
+) or do { print "$usage\n"; exit (1); };
+
+if (defined ($help))
+{
+  print "$usage\n";
+  exit (0);
+}
+
+if (defined ($show_version))
+{
+  print "$PROG_NAME $PROG_VERSION ($PROG_LANG version)\n";
+  exit (0);
+}
+
+# --in-place option requires a named file
+
+warn "WARNING: --in-place/-i option ignored (requires named input files)\n"
+  if defined ($in_place) && @ARGV == 0;
+
+# --backup/-b is meaningless without --in-place
+
+if (defined ($backup_suffix))
+{
+  if (!defined ($in_place))
+  {
+    die "--backup/-b option meaningless without --in-place/-i option\n";
+  }
+}
+
+# Save input filenames
+my @in_file = @ARGV;
+
+my $xf = XMLFormat->new ();
+
+# If a configuration file was named explicitly, use it. An error occurs
+# if the file does not exist.
+
+# If no configuration file was named, fall back to:
+# - The file named by the environment variable XMLFORMAT_CONF, if it exists
+# - ./xmlformat.conf, if it exists
+
+# If no configuration file can be found at all, the built-in default options
+# are used. (These are set up in new().)
+
+my $env_conf_file = $ENV{XMLFORMAT_CONF};
+my $def_conf_file = "./xmlformat.conf";
+
+# If no config file was named, but XMLFORMAT_CONF is set, use its value
+# as the config file name.
+if (!defined ($conf_file))
+{
+  $conf_file = $env_conf_file if defined ($env_conf_file);
+}
+# If config file still isn't defined, use the default file if it exists.
+if (!defined ($conf_file))
+{
+  if (-r $def_conf_file && ! -d $def_conf_file)
+  {
+    $conf_file = $def_conf_file;
+  }
+}
+if (defined ($conf_file))
+{
+  warn "Reading configuration file...\n" if $verbose;
+  die "Configuration file '$conf_file' is not readable.\n" if ! -r $conf_file;
+  die "Configuration file '$conf_file' is a directory.\n"  if -d $conf_file;
+  $xf->read_config ($conf_file)
+}
+
+if ($show_conf)   # show configuration and exit
+{
+  $xf->display_config ();
+  exit(0);
+}
+
+my ($in_doc, $out_doc);
+
+# Process arguments.
+# - If no files named, read string, write to stdout.
+# - If files named, read and process each one. Write output to stdout
+#   unless --in-place option was given.  Make backup of original file
+#   if --backup option was given.
+
+if (@ARGV == 0)
+{
+  warn "Reading document...\n" if $verbose;
+  {
+    local $/ = undef;
+    $in_doc = <>;            # slurp input document as single string
+  }
+
+  $out_doc = $xf->process_doc ($in_doc,
+              $verbose, $check_parser, $canonize_only, $show_unconf_elts);
+  if (defined ($out_doc))
+  {
+    warn "Writing output document...\n" if $verbose;
+    print $out_doc;
+  }
+}
+else
+{
+  foreach my $file (@ARGV)
+  {
+    warn "Reading document $file...\n" if $verbose;
+    open (IN, $file)
+      or die "Cannot read $file: $!\n";
+    {
+      local $/ = undef;
+      $in_doc = <IN>;            # slurp input document as single string
+    }
+    close (IN);
+    $out_doc = $xf->process_doc ($in_doc,
+                $verbose, $check_parser, $canonize_only, $show_unconf_elts);
+    next unless defined ($out_doc);
+    if (defined ($in_place))
+    {
+      if (defined ($backup_suffix))
+      {
+        warn "Making backup of $file to $file$backup_suffix...\n" if $verbose;
+        rename ($file, $file . $backup_suffix)
+          or die "Could not rename $file to $file$backup_suffix: $!\n";
+      }
+      warn "Writing output document to $file...\n" if $verbose;
+      open (OUT, ">$file") or die "Cannot write to $file: $!\n";
+      print OUT $out_doc;
+      close (OUT);
+    }
+    else
+    {
+      warn "Writing output document...\n" if $verbose;
+      print $out_doc;
+    }
+  }
+}
+
+warn "Done!\n" if $verbose;
+
+exit (0);

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/catalog.xml
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/catalog.xml b/share/docs/DocKit/catalog.xml
new file mode 100644
index 0000000..2ad761a
--- /dev/null
+++ b/share/docs/DocKit/catalog.xml
@@ -0,0 +1,26 @@
+<?xml version="1.0"?>
+<!DOCTYPE catalog
+  PUBLIC "-//OASIS//DTD Entity Resolution XML Catalog V1.0//EN"
+"http://www.oasis-open.org/committees/entity/release/1.0/catalog.dtd">
+<catalog xmlns="urn:oasis:names:tc:entity:xmlns:xml:catalog">
+
+<public
+  publicId="-//OASIS//DTD DocBook XML V4.5//EN"
+  uri="dtd.d/db4.5/docbookx.dtd"/>
+<system
+  systemId="http://www.oasis-open.org/docbook/xml/4.5/docbookx.dtd"
+  uri="dtd.d/db4.5/docbookx.dtd"/>
+
+<system
+  systemId="couchdocs:urlapi.dtd"
+  uri="dtd.d/urlapi.dtd"/>
+
+<system
+  systemId="couchdocs:config-options.dtd"
+  uri="dtd.d/config-options.dtd"/>
+
+<system
+  systemId="couchdocs:json-objects.dtd"
+  uri="dtd.d/json-objects.dtd"/>
+
+</catalog>

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/dtd.d/config-options.dtd
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/dtd.d/config-options.dtd b/share/docs/DocKit/dtd.d/config-options.dtd
new file mode 100644
index 0000000..239b049
--- /dev/null
+++ b/share/docs/DocKit/dtd.d/config-options.dtd
@@ -0,0 +1,12 @@
+<!ELEMENT configoptions (#PCDATA | configclasses | configopts)*>
+<!ELEMENT configopts (#PCDATA | configopt)*>
+<!ELEMENT default (#PCDATA)>
+<!ELEMENT description (#PCDATA)>
+<!ELEMENT valuespec (#PCDATA | default)*>
+<!ATTLIST valuespec type CDATA #IMPLIED>
+<!ELEMENT configopt (#PCDATA | description | valuespec)*>
+<!ATTLIST configopt id CDATA #IMPLIED>
+<!ATTLIST configopt class CDATA #IMPLIED>
+<!ELEMENT configclass (#PCDATA | description)*>
+<!ATTLIST configclass id CDATA #IMPLIED>
+<!ELEMENT configclasses (#PCDATA | configclass)*>

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/dtd.d/db4.5/ChangeLog
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/dtd.d/db4.5/ChangeLog b/share/docs/DocKit/dtd.d/db4.5/ChangeLog
new file mode 100644
index 0000000..06f59ce
--- /dev/null
+++ b/share/docs/DocKit/dtd.d/db4.5/ChangeLog
@@ -0,0 +1,106 @@
+2006-10-03 13:23  nwalsh
+
+	* trunk/docbook/sgml/catalog.xml, trunk/docbook/sgml/docbook.cat,
+	  trunk/docbook/sgml/docbook.dcl, trunk/docbook/sgml/docbook.dtd,
+	  calstblx.dtd, catalog.xml, dbcentx.mod, dbgenent.mod,
+	  dbhierx.mod, dbnotnx.mod, dbpoolx.mod, docbook.cat,
+	  docbookx.dtd, htmltblx.mod: DocBook V4.5 released
+
+2006-06-02 11:28  nwalsh
+
+	* calstblx.dtd, catalog.xml, dbcentx.mod, dbgenent.mod,
+	  dbhierx.mod, dbnotnx.mod, dbpoolx.mod, docbook.cat,
+	  docbookx.dtd, freshmeat.xsl, htmltblx.mod: Changed copyright
+	  dates and version numbers
+
+2006-05-30 20:58  nwalsh
+
+	* htmltblx.mod: Supply tag omission markers in SGML; suppress
+	  xml:lang in SGML
+
+2006-03-07 13:11  nwalsh
+
+	* trunk/docbook/sgml/catalog.xml, trunk/docbook/sgml/docbook.cat,
+	  trunk/docbook/sgml/docbook.dcl, trunk/docbook/sgml/docbook.dtd,
+	  calstblx.dtd, catalog.xml, dbcentx.mod, dbgenent.mod,
+	  dbhierx.mod, dbnotnx.mod, dbpoolx.mod, docbook.cat,
+	  docbookx.dtd, freshmeat.xsl, htmltblx.mod: Change version
+	  numbers to 4.5CR2
+
+2006-03-07 13:03  nwalsh
+
+	* dbpoolx.mod: Allow citebiblioid anywhere the other citation
+	  elements are allowed
+
+2006-02-16 21:12  nwalsh
+
+	* calstblx.dtd, catalog.xml, dbcentx.mod, dbgenent.mod,
+	  dbhierx.mod, dbnotnx.mod, dbpoolx.mod, docbook.cat,
+	  docbookx.dtd, freshmeat.xsl, htmltblx.mod: DocBook V4.5 released
+
+2005-06-29 10:59  nwalsh
+
+	* trunk/docbook/sgml/docbook.dtd, docbookx.dtd: DocBook V4.5CR1
+	  Released
+
+2005-06-29 10:58  nwalsh
+
+	* trunk/docbook/sgml/catalog.xml, trunk/docbook/sgml/docbook.cat,
+	  trunk/docbook/sgml/docbook.dcl, calstblx.dtd, catalog.xml,
+	  dbcentx.mod, dbgenent.mod, dbhierx.mod, dbnotnx.mod,
+	  dbpoolx.mod, docbook.cat, htmltblx.mod: Updated version number
+
+2005-06-29 10:53  nwalsh
+
+	* freshmeat.xsl: Tweaked freshmeat changes
+
+2005-06-24 21:09  nwalsh
+
+	* calstblx.dtd, dbhierx.mod, dbpoolx.mod, htmltblx.mod,
+	  soextblx.dtd: Added doc: structured comments
+
+2005-05-05 11:41  nwalsh
+
+	* trunk/docbook/sgml/docbook.dtd, docbookx.dtd: DocBook V4.5b1
+	  Released
+
+2005-05-05 11:40  nwalsh
+
+	* trunk/docbook/sgml/catalog.xml, trunk/docbook/sgml/docbook.cat,
+	  trunk/docbook/sgml/docbook.dcl, calstblx.dtd, catalog.xml,
+	  dbcentx.mod, dbgenent.mod, dbhierx.mod, dbnotnx.mod,
+	  dbpoolx.mod, docbook.cat, htmltblx.mod: Updated version number
+
+2005-05-05 11:37  nwalsh
+
+	* freshmeat.xsl: Prepare for 4.5b1
+
+2005-05-05 10:59  nwalsh
+
+	* dbpoolx.mod: RFE 1055480: Make revnumber optional
+
+2005-05-05 10:54  nwalsh
+
+	* dbpoolx.mod, htmltblx.mod: Allow common attributes on HTML table
+	  elements
+
+2005-05-05 10:48  nwalsh
+
+	* dbpoolx.mod: Added termdef
+
+2005-05-05 10:39  nwalsh
+
+	* dbpoolx.mod: Added mathphrase
+
+2005-05-05 10:33  nwalsh
+
+	* dbhierx.mod: RFE 1070458: Allow colophon in article
+
+2005-05-05 10:32  nwalsh
+
+	* dbpoolx.mod: RFE 1070770: Allow procedure in example
+
+2005-05-05 10:21  nwalsh
+
+	* dbpoolx.mod: Add isrn to list of biblioid class attribute values
+

http://git-wip-us.apache.org/repos/asf/couchdb/blob/1a82fd7c/share/docs/DocKit/dtd.d/db4.5/README
----------------------------------------------------------------------
diff --git a/share/docs/DocKit/dtd.d/db4.5/README b/share/docs/DocKit/dtd.d/db4.5/README
new file mode 100644
index 0000000..6fc60c4
--- /dev/null
+++ b/share/docs/DocKit/dtd.d/db4.5/README
@@ -0,0 +1,8 @@
+README for the DocBook XML DTD
+
+For more information about DocBook, please see
+
+  http://www.oasis-open.org/docbook/
+
+Please send all questions, comments, concerns, and bug reports to the
+DocBook mailing list: docbook@lists.oasis-open.org


Mime
View raw message