Author: crossley
Date: Thu Apr 6 23:14:39 2006
New Revision: 392196
URL: http://svn.apache.org/viewcvs?rev=392196&view=rev
Log:
Perl script and config file to find all xml files and run tidy.sf.net
Issue: FOR-644
Added:
forrest/trunk/etc/test-whitespace/test-1.xml (with props)
forrest/trunk/etc/tidy-config.txt (with props)
forrest/trunk/etc/tidy-xml.pl (with props)
Added: forrest/trunk/etc/test-whitespace/test-1.xml
URL: http://svn.apache.org/viewcvs/forrest/trunk/etc/test-whitespace/test-1.xml?rev=392196&view=auto
==============================================================================
--- forrest/trunk/etc/test-whitespace/test-1.xml (added)
+++ forrest/trunk/etc/test-whitespace/test-1.xml Thu Apr 6 23:14:39 2006
@@ -0,0 +1,92 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!--
+ Copyright 2002-2006 The Apache Software Foundation or its licensors,
+ as applicable.
+
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
+"http://forrest.apache.org/dtd/document-v20.dtd">
+<document>
+ <header>
+ <title>Test document for Tidy</title>
+ </header>
+ <body>
+ <section id="para-1">
+ <title>Paragraphs</title>
+ <p>The following paragraph was formatted into separate lines, with each line
less than 80
+ characters.</p>
+ <p>Apache Forrest is a publishing framework that transforms input from various
sources into a
+ unified presentation in one or more output formats. The modular and extensible plugin
+ architecture is based on Apache Cocoon and relevant standards, which separates presentation
+ from content. Forrest can generate static documents, or be used as a dynamic server,
or be
+ deployed by its automated facility.</p>
+ <p>The following paragraph was all on one line.</p>
+ <p>Apache Forrest is a publishing framework that transforms input from various
sources into a
+ unified presentation in one or more output formats. The modular and extensible plugin
+ architecture is based on Apache Cocoon and relevant standards, which separates presentation
+ from content. Forrest can generate static documents, or be used as a dynamic server,
or be
+ deployed by its automated facility.</p>
+ </section>
+ <section id="para-2">
+ <title>Another section</title>
+ <p>Forrest is ...</p>
+ <ul>
+ <li>
+ <strong>this</strong>and that</li>
+ <li>these and
+ <strong>those</strong></li>
+ </ul>
+ </section>
+ <section id="cdata">
+ <title>CDATA</title>
+ <source>
+ <![CDATA[top line text
+<elements>
+ <idented>
+ A long line that should not get wrapped. A long line that should not get wrapped. A long
line that should not get wrapped.
+ </idented>
+</elements>]]>
+</source>
+ </section>
+ <section id="white">
+ <title>Whitespace and Tabs</title>
+ <p>Trailing whitespace after this closing tag:</p>
+ <p>Indentation was using tabs: here and here.</p>
+ <ul>
+ <li>item one</li>
+ </ul>
+ </section>
+ <section id="entities">
+ <title>Entities</title>
+ <p>What happens to entities?</p>
+ <ul>
+ <li>Jörg</li>
+ <li>this & that</li>
+ <li>greater than > great</li>
+ <li>trademark™</li>
+ <li>Jörg</li>
+ <li>me@there.org</li>
+ </ul>
+ </section>
+ <section id="misc">
+ <title>Miscellaneous</title>
+ <p>Elements with carefully formatted attributes. Do they get re-aligned?</p>
+ <p>
+ <a href="http://forrest.apache.org/" title="Apache Forrest" id="forrest-home">link</a>.</p>
+ <p>
+ <a href="http://forrest.apache.org/docs_0_80/howto/howto-custom-html-source.html"
+ title="How to customize processing of html source" id="custom-html-source">link</a>.</p>
+ </section>
+ </body>
+</document>
Propchange: forrest/trunk/etc/test-whitespace/test-1.xml
------------------------------------------------------------------------------
svn:eol-style = native
Added: forrest/trunk/etc/tidy-config.txt
URL: http://svn.apache.org/viewcvs/forrest/trunk/etc/tidy-config.txt?rev=392196&view=auto
==============================================================================
--- forrest/trunk/etc/tidy-config.txt (added)
+++ forrest/trunk/etc/tidy-config.txt Thu Apr 6 23:14:39 2006
@@ -0,0 +1,10 @@
+add-xml-decl: yes
+add-xml-space: yes
+input-xml: yes
+output-xml:yes
+indent: auto
+indent-spaces: 2
+write-back: yes
+quiet: yes
+wrap: 100
+wrap-sections: no
Propchange: forrest/trunk/etc/tidy-config.txt
------------------------------------------------------------------------------
svn:eol-style = native
Added: forrest/trunk/etc/tidy-xml.pl
URL: http://svn.apache.org/viewcvs/forrest/trunk/etc/tidy-xml.pl?rev=392196&view=auto
==============================================================================
--- forrest/trunk/etc/tidy-xml.pl (added)
+++ forrest/trunk/etc/tidy-xml.pl Thu Apr 6 23:14:39 2006
@@ -0,0 +1,133 @@
+#!/usr/bin/perl -w
+
+# Copyright 2006 The Apache Software Foundation or its licensors,
+# as applicable.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+##########################################################
+#
+# Find all xml type files and run 'tidy' on each.
+# Intended to be run occasionally by committers.
+#
+# Developed only for UNIX, YMMV.
+#
+# Procedure:
+# Run the script. It will descend the directory tree.
+# Run with no parameters or -h to show usage.
+#
+##########################################################
+
+use strict;
+use vars qw($opt_h $opt_v);
+use Getopt::Std;
+use File::Basename;
+use File::Find;
+
+#--------------------------------------------------
+# ensure proper usage
+getopts("hv");
+if ((scalar @ARGV < 2) || defined($opt_h)) {
+ ShowUsage();
+ exit;
+}
+my $startDir = shift;
+if (!-e $startDir) {
+ print STDERR "\nThe start directory '$startDir' does not exist.\n";
+ ShowUsage();
+ exit;
+}
+my $configFile = shift;
+if (!-e $configFile) {
+ print STDERR "\nThe configuration file '$configFile' does not exist.\n";
+ ShowUsage();
+ exit;
+}
+
+#--------------------------------------------------
+# configuration
+my $command = "tidy -config $configFile";
+my @xmlFileTypes = (
+ ".xml", ".xsl", ".xslt", ".xmap", ".xcat",
+ ".xmap", ".xconf", ".xroles", ".roles", ".xsp", ".rss",
+ ".xlog", ".xsamples", ".xtest", ".xweb", ".xwelcome",
+ ".samplesxconf", ".samplesxpipe", ".svg", ".xhtml", ".jdo", ".gt", ".jx", ".jm
+x",
+ ".jxt", ".meta", ".pagesheet", ".stx", ".xegrm", ".xgrm", ".xlex", ".xmi",
+ ".xsd", ".rng", ".rdf", ".rdfs", ".xul", ".tld", ".xxe", ".ft", ".fv",
+);
+my $countTotal = 0;
+
+chdir "$startDir" or die "Cannot cd to '$startDir': $!\n";
+
+#--------------------------------------------------
+sub process_file {
+ return unless -f && -T; # process only text files
+ my $fileName = $File::Find::name;
+ my ($file, $dir, $ext) = fileparse($fileName, qr/\.[^.]*/);
+ return if ($dir =~ /\/\.svn\//); # skip SVN directories
+ return if ($dir =~ /\/CVS\//); # skip CVS directories
+ return if ($dir =~ /\/build\//); # skip build directories
+ return if ($file =~ /^\./); # skip hidden files
+ return unless isXmlType($ext); # process only xml files
+ $fileName =~ s/^\.\///; # strip leading ./
+ my $pathName = $startDir . "/" . $fileName;
+ $countTotal++;
+ if ($opt_v) { print "$fileName : \n"; }
+ open (TIDY, "$command $fileName |") or warn "Cannot open TIDY: $!";
+ while (<TIDY>) {
+ print;
+ }
+ close TIDY;
+}
+find(\&process_file, ".");
+
+#--------------------------------------------------
+# report some statistics
+print qq!
+Processed $countTotal xml-type files.
+!;
+print "\n";
+
+#==================================================
+# isXmlType
+#==================================================
+
+sub isXmlType {
+ my ($extension) = @_;
+ foreach my $e (@xmlFileTypes) {
+ return 1 if $extension eq $e;
+ }
+ return 0;
+}
+
+#==================================================
+# ShowUsage
+#==================================================
+
+sub ShowUsage {
+ print STDERR qq!
+Usage: $0 [-h] [-v] startDir configFile > logfile
+
+ where:
+ startDir = The directory (pathname) to start processing. Will descend.
+ configFile = Pathname for configuration file for 'tidy'
+
+ option:
+ h = Show this help message.
+ v = Be verbose.
+
+Note: It will skip directories with name /build/
+
+!;
+}
Propchange: forrest/trunk/etc/tidy-xml.pl
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: forrest/trunk/etc/tidy-xml.pl
------------------------------------------------------------------------------
svn:executable = *
|