pdfbox-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From ju...@apache.org
Subject svn commit: r910660 [5/5] - in /pdfbox/site/publish: ./ commandlineutilities/ css/ images/ images/logos/ userguide/
Date Tue, 16 Feb 2010 19:37:20 GMT
Added: pdfbox/site/publish/userguide/file_references.html
URL: http://svn.apache.org/viewvc/pdfbox/site/publish/userguide/file_references.html?rev=910660&view=auto
==============================================================================
--- pdfbox/site/publish/userguide/file_references.html (added)
+++ pdfbox/site/publish/userguide/file_references.html Tue Feb 16 19:37:14 2010
@@ -0,0 +1,311 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <title>Apache PDFBox - PDFBox - PDF File References</title>
+    <style type="text/css" media="all">
+      @import url("../css/maven-base.css");
+      @import url("../css/maven-theme.css");
+      @import url("../css/site.css");
+    </style>
+    <link rel="stylesheet" href="../css/print.css" type="text/css" media="print" />
+          </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="../images/Logo.gif" alt="Apache PDFBox" />
+    
+            </a>
+                        <a href="http://www.apache.org/" id="bannerRight">
+    
+                                    <img src="http://www.apache.org/images/asf_logo.gif" alt="The Apache Software Foundation" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+            
+  
+    
+              <div class="xright">      
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+            
+  
+    
+                   <h5>About</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../index.html">Welcome</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../download.html">Download</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">License</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../mailing-list.html">Mailing Lists</a>
+          </li>
+              
+    <li class="none">
+                    <a href="https://issues.apache.org/jira/browse/PDFBOX" class="externalLink">Issue Tracker</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../references.html">References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink">ASF Sponsorship Program</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/thanks.html" class="externalLink">ASF Thanks</a>
+          </li>
+          </ul>
+              <h5>Command Line Utilities</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Decrypt.html">Decrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Encrypt.html">Encrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ExtractText.html">ExtractText</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFToImage.html">PDFToImage</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PrintPDF.html">PrintPDF</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ConvertColorspace.html">ConvertColorspace</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/TextToPDF.html">TextToPDF</a>
+          </li>
+          </ul>
+              <h5>Developers Guide</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../userguide/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/bookmarks.html">Bookmarks</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/building_pdfbox.html">Building PDFBox</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/faq.html">FAQ</a>
+          </li>
+              
+    <li class="none">
+              <strong>File References</strong>
+        </li>
+              
+    <li class="none">
+                    <a href="../userguide/fonts.html">Fonts</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/highlighting.html">Highlighting</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/metadata.html">Metadata</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/redistribution.html">Redistribution</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/dot_net.html">.NET Version</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/text_extraction.html">Text Extraction</a>
+          </li>
+          </ul>
+              <h5>Project Documentation</h5>
+            <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+                    <a href="../project-info.html">Project Information</a>
+                </li>
+          </ul>
+                                           <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
+            <img alt="Built by Maven" src="../images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+            
+  
+    
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <div class="section"><h2><a name="PDF_File_Specification"></a>PDF File Specification</h2>
+<p>
+      See package:<a href="../apidocs/org/apache/pdfbox/pdmodel/common/filespecification/package-summary.html">org.apache.pdfbox.pdmodel.common.filespecification</a><br />
+
+      See example:<a href="../apidocs/org/apache/pdfbox/examples/pdmodel/EmbeddedFiles.html">EmbeddedFiles</a></p>
+<p>
+      	A PDF can contain references to external files via the file system or a URL to a remote location.
+      	It is also possible to embed a binary file into a PDF document.
+      </p>
+<p>
+      There are two classes that can be used when referencing a file.
+      <a href="../apidocs/org/apache/pdfbox/examples/pdmodel/common/filespecification/PDSimpleFileSpecification.html">PDSimpleFileSpecification</a>
+      is a simple string reference to a file(e.g. &quot;./movies/BigMovie.avi&quot;).  The simple file specification does not allow for any parameters to be
+      set.  The <a href="../apidocs/org/apache/pdfbox/examples/pdmodel/common/filespecification/PDComplexFileSpecification.html">PDComplexFileSpecification</a>
+      is more feature rich and allows for advanced settings on the file reference.
+      </p>
+<p>
+      It is also possible to embed a file directly into a PDF.  Instead of setting the file attribute of the PDComplexFileSpecification, the
+      EmbeddedFile attribute can be used instead.
+      </p>
+</div>
+<div class="section"><h2><a name="File_Attachments"></a>File Attachments</h2>
+<p>
+      PDF documents can contain file attachments that are accessed from the Document-&gt;File Attachments menu.  PDFBox allows attachments
+      to be added to and extracted from PDF documents.  Attachments are part of the named tree that is attached to the document catalog.
+      </p>
+<div class="source"><pre>
+        PDEmbeddedFilesNameTreeNode efTree = new PDEmbeddedFilesNameTreeNode();
+
+        //first create the file specification, which holds the embedded file
+        PDComplexFileSpecification fs = new PDComplexFileSpecification();
+        fs.setFile( &quot;Test.txt&quot; );
+        InputStream is = ...;
+        PDEmbeddedFile ef = new PDEmbeddedFile(doc, is );
+        //set some of the attributes of the embedded file
+        ef.setSubtype( &quot;test/plain&quot; );
+        ef.setSize( data.length );
+        ef.setCreationDate( new GregorianCalendar() );
+        fs.setEmbeddedFile( ef );
+
+        //now add the entry to the embedded file tree and set in the document.
+        Map efMap = new HashMap();
+        efMap.put( &quot;My first attachment&quot;, fs );
+        efTree.setNames( efMap );
+        //attachments are stored as part of the &quot;names&quot; dictionary in the document catalog
+        PDDocumentNameDictionary names = new PDDocumentNameDictionary( doc.getDocumentCatalog() );
+        names.setEmbeddedFiles( efTree );
+        doc.getDocumentCatalog().setNames( names );
+  </pre>
+</div>
+</div>
+
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2008-2010
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Propchange: pdfbox/site/publish/userguide/file_references.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/site/publish/userguide/fonts.html
URL: http://svn.apache.org/viewvc/pdfbox/site/publish/userguide/fonts.html?rev=910660&view=auto
==============================================================================
--- pdfbox/site/publish/userguide/fonts.html (added)
+++ pdfbox/site/publish/userguide/fonts.html Tue Feb 16 19:37:14 2010
@@ -0,0 +1,320 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <title>Apache PDFBox - PDFBox - PDF Fonts</title>
+    <style type="text/css" media="all">
+      @import url("../css/maven-base.css");
+      @import url("../css/maven-theme.css");
+      @import url("../css/site.css");
+    </style>
+    <link rel="stylesheet" href="../css/print.css" type="text/css" media="print" />
+          </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="../images/Logo.gif" alt="Apache PDFBox" />
+    
+            </a>
+                        <a href="http://www.apache.org/" id="bannerRight">
+    
+                                    <img src="http://www.apache.org/images/asf_logo.gif" alt="The Apache Software Foundation" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+            
+  
+    
+              <div class="xright">      
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+            
+  
+    
+                   <h5>About</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../index.html">Welcome</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../download.html">Download</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">License</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../mailing-list.html">Mailing Lists</a>
+          </li>
+              
+    <li class="none">
+                    <a href="https://issues.apache.org/jira/browse/PDFBOX" class="externalLink">Issue Tracker</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../references.html">References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink">ASF Sponsorship Program</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/thanks.html" class="externalLink">ASF Thanks</a>
+          </li>
+          </ul>
+              <h5>Command Line Utilities</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Decrypt.html">Decrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Encrypt.html">Encrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ExtractText.html">ExtractText</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFToImage.html">PDFToImage</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PrintPDF.html">PrintPDF</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ConvertColorspace.html">ConvertColorspace</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/TextToPDF.html">TextToPDF</a>
+          </li>
+          </ul>
+              <h5>Developers Guide</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../userguide/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/bookmarks.html">Bookmarks</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/building_pdfbox.html">Building PDFBox</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/faq.html">FAQ</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/file_references.html">File References</a>
+          </li>
+              
+    <li class="none">
+              <strong>Fonts</strong>
+        </li>
+              
+    <li class="none">
+                    <a href="../userguide/highlighting.html">Highlighting</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/metadata.html">Metadata</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/redistribution.html">Redistribution</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/dot_net.html">.NET Version</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/text_extraction.html">Text Extraction</a>
+          </li>
+          </ul>
+              <h5>Project Documentation</h5>
+            <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+                    <a href="../project-info.html">Project Information</a>
+                </li>
+          </ul>
+                                           <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
+            <img alt="Built by Maven" src="../images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+            
+  
+    
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <div class="section"><h2><a name="tandard_14_Fonts"></a>tandard 14 Fonts</h2>
+<p>
+  		The PDF specification states that a standard set of 14 fonts will always be available when consuming
+  		PDF documents.  In PDFBox these are defined as constants in the PDType1Font class.
+  		</p>
+<table class="bodyTable"><tr class="a"><th>Standard Font</th>
+</tr>
+<tr class="b"><td>PDType1Font.TIMES_ROMAN</td>
+</tr>
+<tr class="a"><td>PDType1Font.TIMES_BOLD</td>
+</tr>
+<tr class="b"><td>PDType1Font.TIMES_ITALIC</td>
+</tr>
+<tr class="a"><td>PDType1Font.TIMES_BOLD_ITALIC</td>
+</tr>
+<tr class="b"><td>PDType1Font.HELVETICA</td>
+</tr>
+<tr class="a"><td>PDType1Font.HELVETICA_BOLD</td>
+</tr>
+<tr class="b"><td>PDType1Font.HELVETICA_OBLIQUE</td>
+</tr>
+<tr class="a"><td>PDType1Font.HELVETICA_BOLD_OBLIQUE</td>
+</tr>
+<tr class="b"><td>PDType1Font.COURIER</td>
+</tr>
+<tr class="a"><td>PDType1Font.COURIER_BOLD</td>
+</tr>
+<tr class="b"><td>PDType1Font.COURIER_OBLIQUE</td>
+</tr>
+<tr class="a"><td>PDType1Font.COURIER_BOLD_OBLIQUE</td>
+</tr>
+<tr class="b"><td>PDType1Font.SYMBOL</td>
+</tr>
+<tr class="a"><td>PDType1Font.ZAPF_DINGBATS</td>
+</tr>
+</table>
+</div>
+<div class="section"><h2><a name="TrueType_Fonts"></a>TrueType Fonts</h2>
+<div class="section"><h2><a name="Embedding_TrueType_Fonts"></a>Embedding TrueType Fonts</h2>
+<p>
+      	PDFBox supports embedding TrueType fonts.  Loading a new font is easy.
+      	</p>
+<div class="source"><pre>
+      PDDocument doc = PDDocument.load( ... );
+      PDFont font = PDTrueTypeFont.loadTTF( doc, new File( &quot;SpecialFont.ttf&quot; ) );</pre>
+</div>
+</div>
+<div class="section"><h2><a name="External_TrueType_Fonts"></a>External TrueType Fonts</h2>
+<p>
+      	While it is recommended to embed all fonts for greatest portability not all PDF producer applications
+      	will do this.  When displaying a PDF it is necessary to find an external font to use.
+      	PDFBox will look for a mapping file to use when substituting fonts.<br />
+<br />
+
+      	PDFBox will load <i>Resources/PDFBox_External_Fonts.properties</i> off of the classpath to map
+      	font names to TTF font files.  The <i>UNKNOWN_FONT</i> property in that file will tell PDFBox which font
+      	to use when no mapping exists.
+      	</p>
+</div>
+</div>
+
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2008-2010
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Propchange: pdfbox/site/publish/userguide/fonts.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/site/publish/userguide/highlighting.html
URL: http://svn.apache.org/viewvc/pdfbox/site/publish/userguide/highlighting.html?rev=910660&view=auto
==============================================================================
--- pdfbox/site/publish/userguide/highlighting.html (added)
+++ pdfbox/site/publish/userguide/highlighting.html Tue Feb 16 19:37:14 2010
@@ -0,0 +1,323 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <title>Apache PDFBox - PDFBox - PDF Highlighting</title>
+    <style type="text/css" media="all">
+      @import url("../css/maven-base.css");
+      @import url("../css/maven-theme.css");
+      @import url("../css/site.css");
+    </style>
+    <link rel="stylesheet" href="../css/print.css" type="text/css" media="print" />
+          </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="../images/Logo.gif" alt="Apache PDFBox" />
+    
+            </a>
+                        <a href="http://www.apache.org/" id="bannerRight">
+    
+                                    <img src="http://www.apache.org/images/asf_logo.gif" alt="The Apache Software Foundation" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+            
+  
+    
+              <div class="xright">      
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+            
+  
+    
+                   <h5>About</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../index.html">Welcome</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../download.html">Download</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">License</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../mailing-list.html">Mailing Lists</a>
+          </li>
+              
+    <li class="none">
+                    <a href="https://issues.apache.org/jira/browse/PDFBOX" class="externalLink">Issue Tracker</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../references.html">References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink">ASF Sponsorship Program</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/thanks.html" class="externalLink">ASF Thanks</a>
+          </li>
+          </ul>
+              <h5>Command Line Utilities</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Decrypt.html">Decrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Encrypt.html">Encrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ExtractText.html">ExtractText</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFToImage.html">PDFToImage</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PrintPDF.html">PrintPDF</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ConvertColorspace.html">ConvertColorspace</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/TextToPDF.html">TextToPDF</a>
+          </li>
+          </ul>
+              <h5>Developers Guide</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../userguide/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/bookmarks.html">Bookmarks</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/building_pdfbox.html">Building PDFBox</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/faq.html">FAQ</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/file_references.html">File References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/fonts.html">Fonts</a>
+          </li>
+              
+    <li class="none">
+              <strong>Highlighting</strong>
+        </li>
+              
+    <li class="none">
+                    <a href="../userguide/metadata.html">Metadata</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/redistribution.html">Redistribution</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/dot_net.html">.NET Version</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/text_extraction.html">Text Extraction</a>
+          </li>
+          </ul>
+              <h5>Project Documentation</h5>
+            <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+                    <a href="../project-info.html">Project Information</a>
+                </li>
+          </ul>
+                                           <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
+            <img alt="Built by Maven" src="../images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+            
+  
+    
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <meta name="keywords"></meta><div class="section"><h2><a name="Highlighting_text_in_a_PDF"></a>Highlighting text in a PDF</h2>
+<p>
+            There are cases when you might want to highlight text in a PDF document.  For example, if the PDF is the result
+            of a search request you might want to highlight the word in the resulting PDF document.  There are several ways
+            this can be achieved, each method varying in complexity and flexibility.
+        </p>
+<div class="section"><h2><a name="a1._Use_the_search_open_parameter"></a>1. Use the 'search' open parameter</h2>
+<p>
+          Acrobat supports passing is various parameters that tell it what to do once the PDF is open.
+          See <a href="http://partners.adobe.com/public/developer/en/acrobat/PDFOpenParameters.pdf" class="externalLink">PDF Open Parameters</a> for
+          documentation on all the open parameters.  One of the parameters is the 'search' parameter, this will automatically run the search
+          functionality inside of Acrobat once the PDF is open.  For example: <a href="http://pdfbox.apache.org/userguide/text_extraction.pdf#search=&quot;check&quot;" class="externalLink">http://pdfbox.apache.org/userguide/text_extraction.pdf#search=&quot;check&quot;</a><br />
+<br />
+<note>The words must be enclosed in quotes and separated by spaces; for example:#search=&quot;pdfbox rocks&quot;</note>
+          This is a great solution because of its simplicity!  It doesn't require PDFBox at all, but it is a potential solution that
+          many developers are not aware of.
+          </p>
+</div>
+<div class="section"><h2><a name="a2._Generate_a_highlight_XML_document"></a>2. Generate a highlight XML document</h2>
+<p>
+          Acrobat also allows you to tell it to highlight specific words in the PDF document.  It does this by passing an XML document to
+          Acrobat when opening the PDF.
+          See the <a href="http://partners.adobe.com/public/developer/en/pdf/HighlightFileFormat.pdf" class="externalLink">PDF Highlight File Format</a>
+          for more detailed documentation.  <br />
+<br />
+
+          Basically the document allows you to tell it the characters to highlight in the PDF by using character
+          offsets on a page.  As this is just an XML document, there are many ways you could create it but PDFBox does have a utility to make it
+          easier.  Take a look at the javadoc for the <a href="../apidocs/org/apache/pdfbox/util/PDFHighlighter.html">PDFHighlighter</a> class.  This will
+          allow you specify a set of words that you want have highlighted and generate the XML document for you.  <br />
+<br />
+
+          PDFBox also ships with a complete
+          web application example of using this class, take a look at the pdfbox.war directory in your PDFBox installation.
+          <br />
+
+          You pass the xml to acrobat through a URL (or command line) parameter like this:
+          <a href="http://pdfbox.apache.org/userguide/text_extraction.pdf#xml=http://pdfbox.apache.org/highlight.xml" class="externalLink">http://pdfbox.apache.org/userguide/text_extraction.pdf#xml=http://pdfbox.apache.org/highlight.xml</a><br />
+<note>The value of the xml parameter must be a full URL to the XML document.  <br />
+
+          http://pdfbox.apache.org/userguide/text_extraction.pdf#xml=highlight.xml will not work<br />
+
+          http://pdfbox.apache.org/userguide/text_extraction.pdf#xml=http://pdfbox.apache.org/highlight.xml is correct!</note><br />
+The one drawback to this solution is that you must parse the PDF and then generate an XML document, which is a time consuming operation.
+          </p>
+</div>
+<div class="section"><h2><a name="a3._Alter_pdf_contents_to_highlight_specific_text"></a>3. Alter pdf contents to highlight specific text</h2>
+<p>
+          Using PDFBox it is possible to regenerate the appearance stream to add highlighting to specific areas.  While this is possible,
+          it will require recreating a new PDF for every search request.  There is nothing prebuilt in PDFBox to do this automatically for you
+          and will require a significant coding effort.<br />
+<br />
+
+          You would need to
+          <ol type="1"><li>Find all locations of the text, determine x/y coordinates, width/height</li>
+<li>Regenerate the PDF appearance stream and draw a highlighted box behind the text.  Yellow would be easiest, if you want an inverted black/white, then you would need to change the color of the text to be white and draw a black box.</li>
+<li>Stream the PDF back to the user</li>
+</ol>
+
+          This is the most flexible but is also the most work to implement and is also more resource intensive.
+          </p>
+</div>
+</div>
+
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2008-2010
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Propchange: pdfbox/site/publish/userguide/highlighting.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/site/publish/userguide/index.html
URL: http://svn.apache.org/viewvc/pdfbox/site/publish/userguide/index.html?rev=910660&view=auto
==============================================================================
--- pdfbox/site/publish/userguide/index.html (added)
+++ pdfbox/site/publish/userguide/index.html Tue Feb 16 19:37:14 2010
@@ -0,0 +1,399 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <title>Apache PDFBox - PDFBox - User Guide</title>
+    <style type="text/css" media="all">
+      @import url("../css/maven-base.css");
+      @import url("../css/maven-theme.css");
+      @import url("../css/site.css");
+    </style>
+    <link rel="stylesheet" href="../css/print.css" type="text/css" media="print" />
+          </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="../images/Logo.gif" alt="Apache PDFBox" />
+    
+            </a>
+                        <a href="http://www.apache.org/" id="bannerRight">
+    
+                                    <img src="http://www.apache.org/images/asf_logo.gif" alt="The Apache Software Foundation" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+            
+  
+    
+              <div class="xright">      
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+            
+  
+    
+                   <h5>About</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../index.html">Welcome</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../download.html">Download</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">License</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../mailing-list.html">Mailing Lists</a>
+          </li>
+              
+    <li class="none">
+                    <a href="https://issues.apache.org/jira/browse/PDFBOX" class="externalLink">Issue Tracker</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../references.html">References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink">ASF Sponsorship Program</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/thanks.html" class="externalLink">ASF Thanks</a>
+          </li>
+          </ul>
+              <h5>Command Line Utilities</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Decrypt.html">Decrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Encrypt.html">Encrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ExtractText.html">ExtractText</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFToImage.html">PDFToImage</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PrintPDF.html">PrintPDF</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ConvertColorspace.html">ConvertColorspace</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/TextToPDF.html">TextToPDF</a>
+          </li>
+          </ul>
+              <h5>Developers Guide</h5>
+            <ul>
+              
+    <li class="none">
+              <strong>Index</strong>
+        </li>
+              
+    <li class="none">
+                    <a href="../userguide/bookmarks.html">Bookmarks</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/building_pdfbox.html">Building PDFBox</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/faq.html">FAQ</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/file_references.html">File References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/fonts.html">Fonts</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/highlighting.html">Highlighting</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/metadata.html">Metadata</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/redistribution.html">Redistribution</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/dot_net.html">.NET Version</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/text_extraction.html">Text Extraction</a>
+          </li>
+          </ul>
+              <h5>Project Documentation</h5>
+            <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+                    <a href="../project-info.html">Project Information</a>
+                </li>
+          </ul>
+                                           <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
+            <img alt="Built by Maven" src="../images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+            
+  
+    
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <div class="section"><h2><a name="PDFBox_User_Guide"></a>PDFBox User Guide</h2>
+<p>
+        	This page will discuss the internals of PDF documents
+        	and how those internals map to PDFBox classes.
+        	Users should reference the javadoc to see what classes and methods are available.  The
+        	<a href="http://partners.adobe.com/public/developer/pdf/index_reference.html" class="externalLink">Adobe PDF Reference</a>
+        	can be used to determine detailed information about fields and their meanings.
+        </p>
+</div>
+<div class="section"><h2><a name="Examples"></a>Examples</h2>
+<p>A variety of examples can be found in the
+        src/main/java/org/apache/pdfbox/examples folder.
+        This guide will refer to specific examples as needed.
+        </p>
+</div>
+<div class="section"><h2><a name="PDF_File_Format_Overview"></a>PDF File Format Overview</h2>
+<p>
+      A PDF document is a stream of basic object types.  The low level objects are represented in PDFBox
+      in the <i>org.apache.pdfbox.cos</i> package.  The basic types in a PDF are:
+      </p>
+<table class="bodyTable"><tr class="a"><th>PDF Type</th>
+<th>Description</th>
+<th>Example</th>
+<th>PDFBox class</th>
+</tr>
+<tr class="b"><td>Array</td>
+<td>An ordered list of items</td>
+<td>[1 2 3]</td>
+<td>org.apache.pdfbox.cos.COSArray</td>
+</tr>
+<tr class="a"><td>Boolean</td>
+<td>Standard True/False values</td>
+<td>true</td>
+<td>org.apache.pdfbox.cos.COSBoolean</td>
+</tr>
+<tr class="b"><td>Dictionary</td>
+<td>A map of name value pairs</td>
+<td>&lt;&lt;<br />
+
+      		    /Type /XObject<br />
+
+      		    /Name (Name)<br />
+
+      		    /Size 1<br />
+
+      		    &gt;&gt;
+      		 </td>
+<td>org.apache.pdfbox.cos.COSDictionary</td>
+</tr>
+<tr class="a"><td>Number</td>
+<td>Integer and Floating point numbers</td>
+<td>1 2.3</td>
+<td>org.apache.pdfbox.cos.COSFloat<br />
+org.apache.pdfbox.cos.COSInteger</td>
+</tr>
+<tr class="b"><td>Name</td>
+<td>A predefined value in a PDF document, typically used as a key in a dictionary</td>
+<td>/Type</td>
+<td>org.apache.pdfbox.cos.COSName</td>
+</tr>
+<tr class="a"><td>Object</td>
+<td>A wrapper to any of the other objects, this can be used to reference an object multiple times.
+      		  An object is referenced by using two numbers, an object number and a generation number.  Initially
+      		  the generation number will be zero unless the object got replaced later in the stream.
+      		</td>
+<td>12 0 obj &lt;&lt; /Type /XObject &gt;&gt; endobj</td>
+<td>org.apache.pdfbox.cos.COSObject</td>
+</tr>
+<tr class="b"><td>Stream</td>
+<td>A stream of data, typically compressed.  This is used for page contents, images and
+      		embedded font streams.
+      		</td>
+<td>12 0 obj &lt;&lt; /Type /XObject &gt;&gt; stream 030004040404040404 endstream</td>
+<td>org.apache.pdfbox.cos.COSStream</td>
+</tr>
+<tr class="a"><td>String</td>
+<td>A sequence of characters
+      		</td>
+<td>(This is a string)</td>
+<td>org.apache.pdfbox.cos.COSString</td>
+</tr>
+</table>
+<p>
+      A page in a pdf document is represented with a COSDictionary.  The entries that are available for
+      a page can be seen in the PDF Reference and an example of a page looks like this:
+      </p>
+<table class="bodyTable"><tr class="b"><td><pre>
+&lt;&lt;
+    /Type /Page
+    /MediaBox [0 0 612 915]
+    /Contents 56 0 R
+&gt;&gt;</pre></td>
+</tr>
+</table>
+<p>Some Java code to access fields</p>
+<table class="bodyTable"><tr class="a"><td><pre>COSDictionary page = ...;
+COSArray mediaBox = (COSArray)page.getDictionaryObject( &quot;MediaBox&quot; );
+System.out.println( &quot;Width:&quot; + mediaBox.get( 3 ) );
+</pre></td>
+</tr>
+</table>
+</div>
+<div class="section"><h2><a name="PD_Model"></a>PD Model</h2>
+<p>The COS Model allows access to all aspects of a PDF document.  This type of programming is
+    	tedious and error prone though because the user must know all of the names of the parameters
+    	and no helper methods are available.  The PD Model was created to help alleviate this problem.
+    	Each type of object(page, font, image) has a set of defined attributes that can be available
+    	in the dictionary.  A PD Model class is available for each of these so that strongly typed
+    	methods are available to access the attributes.  The same code from above to get the page width
+    	can be rewritten to use PD Model classes.
+    	</p>
+<table class="bodyTable"><tr class="b"><td><pre>PDPage page = ...;
+PDRectangle mediaBox = page.getMediaBox();
+System.out.println( &quot;Width:&quot; + mediaBox.getWidth() );</pre></td>
+</tr>
+</table>
+<p>PD Model objects sit on top of COS model.  Typically, the classes in the PD Model
+         will only store a COS object and all setter/getter methods will modify data that
+         is stored in the COS object.  For example, when you call PDPage.getLastModified() the method
+         will do a lookup in the COSDictionary with the key &quot;LastModified&quot;, if it is found the value is
+         then converter to a java.util.Calendar.  When PDPage.setLastModified( Calendar ) is called
+         then the Calendar is converted to a string in the COSDictionary.
+      </p>
+<p>Here is a visual depiction of the COS Model and PD Model design.</p>
+<center><img src="../images/cos-pdmodel diagram.png" /></center><p>
+      This design presents many advantages and disadvantages.<br />
+<br />
+<b>Advantages:</b></p>
+<ul><li>Simple, easy to use API.</li>
+<li>Underlying document automatically gets updated when you update the PD Model</li>
+<li>Ability to easily access the COS Model from any PD Model object</li>
+<li>Easily add to and update existing PDF documents</li>
+</ul>
+<p><b>Disadvantages:</b></p>
+<ul><li>Object caching is not done in the PD Model classes
+      	    <note>For example, each call to PDPage.getMediaBox() will return a new PDRectangle
+      	    object, but will contain the same underlying COSArray.</note></li>
+</ul>
+</div>
+
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2008-2010
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Propchange: pdfbox/site/publish/userguide/index.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/site/publish/userguide/metadata.html
URL: http://svn.apache.org/viewvc/pdfbox/site/publish/userguide/metadata.html?rev=910660&view=auto
==============================================================================
--- pdfbox/site/publish/userguide/metadata.html (added)
+++ pdfbox/site/publish/userguide/metadata.html Tue Feb 16 19:37:14 2010
@@ -0,0 +1,295 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <title>Apache PDFBox - PDFBox - PDF Metadata</title>
+    <style type="text/css" media="all">
+      @import url("../css/maven-base.css");
+      @import url("../css/maven-theme.css");
+      @import url("../css/site.css");
+    </style>
+    <link rel="stylesheet" href="../css/print.css" type="text/css" media="print" />
+          </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="../images/Logo.gif" alt="Apache PDFBox" />
+    
+            </a>
+                        <a href="http://www.apache.org/" id="bannerRight">
+    
+                                    <img src="http://www.apache.org/images/asf_logo.gif" alt="The Apache Software Foundation" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+            
+  
+    
+              <div class="xright">      
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+            
+  
+    
+                   <h5>About</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../index.html">Welcome</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../download.html">Download</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">License</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../mailing-list.html">Mailing Lists</a>
+          </li>
+              
+    <li class="none">
+                    <a href="https://issues.apache.org/jira/browse/PDFBOX" class="externalLink">Issue Tracker</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../references.html">References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink">ASF Sponsorship Program</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/thanks.html" class="externalLink">ASF Thanks</a>
+          </li>
+          </ul>
+              <h5>Command Line Utilities</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Decrypt.html">Decrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Encrypt.html">Encrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ExtractText.html">ExtractText</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFToImage.html">PDFToImage</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PrintPDF.html">PrintPDF</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ConvertColorspace.html">ConvertColorspace</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/TextToPDF.html">TextToPDF</a>
+          </li>
+          </ul>
+              <h5>Developers Guide</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../userguide/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/bookmarks.html">Bookmarks</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/building_pdfbox.html">Building PDFBox</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/faq.html">FAQ</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/file_references.html">File References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/fonts.html">Fonts</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/highlighting.html">Highlighting</a>
+          </li>
+              
+    <li class="none">
+              <strong>Metadata</strong>
+        </li>
+              
+    <li class="none">
+                    <a href="../userguide/redistribution.html">Redistribution</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/dot_net.html">.NET Version</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/text_extraction.html">Text Extraction</a>
+          </li>
+          </ul>
+              <h5>Project Documentation</h5>
+            <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+                    <a href="../project-info.html">Project Information</a>
+                </li>
+          </ul>
+                                           <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
+            <img alt="Built by Maven" src="../images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+            
+  
+    
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <meta name="keywords"></meta><div class="section"><h2><a name="Accessing_PDF_Metadata"></a>Accessing PDF Metadata</h2>
+<p>
+      See class:<a href="../apidocs/org/apache/pdfbox/pdmodel/common/PDMetadata.html">org.apache.pdfbox.pdmodel.common.PDMetadata</a><br />
+
+      See example:<a href="../apidocs/org/apache/pdfbox/examples/pdmodel/AddMetadataFromDocInfo.html">AddMetadataFromDocInfo</a><br />
+
+      See Adobe Documentation:<a href="http://partners.adobe.com/public/developer/en/xmp/sdk/xmpspecification.pdf" class="externalLink">XMP Specification</a></p>
+<p>
+      	PDF documents can have XML metadata associated with certain objects within a PDF document.  For example, the following PD Model objects
+      	have the ability to contain metadata:
+     </p>
+<ul><li>PDDocumentCatalog</li>
+<li>PDPage</li>
+<li>PDXObject</li>
+<li>PDICCBased</li>
+<li>PDStream</li>
+</ul>
+<p>The metadata that is stored in PDF objects conforms to the XMP specification, it is recommended that you review that specification.
+        Currently there is no high level API for managing the XML metadata, PDFBox uses standard java InputStream/OutputStream
+        to retrieve or set the XML metadata.  For example:</p>
+<div class="source"><pre>
+      PDDocument doc = PDDocument.load( ... );
+      PDDocumentCatalog catalog = doc.getDocumentCatalog();
+      PDMetadata metadata = catalog.getMetadata();
+
+      //to read the XML metadata
+      InputStream xmlInputStream = metadata.createInputStream();
+
+      //or to write new XML metadata
+      InputStream newXMPData = ...;
+      PDMetadata newMetadata = new PDMetadata(doc, newXMLData, false );
+      catalog.setMetadata( newMetadata );
+      </pre>
+</div>
+</div>
+
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2008-2010
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Propchange: pdfbox/site/publish/userguide/metadata.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/site/publish/userguide/redistribution.html
URL: http://svn.apache.org/viewvc/pdfbox/site/publish/userguide/redistribution.html?rev=910660&view=auto
==============================================================================
--- pdfbox/site/publish/userguide/redistribution.html (added)
+++ pdfbox/site/publish/userguide/redistribution.html Tue Feb 16 19:37:14 2010
@@ -0,0 +1,315 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <title>Apache PDFBox - PDFBox - Redistribution</title>
+    <style type="text/css" media="all">
+      @import url("../css/maven-base.css");
+      @import url("../css/maven-theme.css");
+      @import url("../css/site.css");
+    </style>
+    <link rel="stylesheet" href="../css/print.css" type="text/css" media="print" />
+          </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="../images/Logo.gif" alt="Apache PDFBox" />
+    
+            </a>
+                        <a href="http://www.apache.org/" id="bannerRight">
+    
+                                    <img src="http://www.apache.org/images/asf_logo.gif" alt="The Apache Software Foundation" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+            
+  
+    
+              <div class="xright">      
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+            
+  
+    
+                   <h5>About</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../index.html">Welcome</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../download.html">Download</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">License</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../mailing-list.html">Mailing Lists</a>
+          </li>
+              
+    <li class="none">
+                    <a href="https://issues.apache.org/jira/browse/PDFBOX" class="externalLink">Issue Tracker</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../references.html">References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink">ASF Sponsorship Program</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/thanks.html" class="externalLink">ASF Thanks</a>
+          </li>
+          </ul>
+              <h5>Command Line Utilities</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Decrypt.html">Decrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Encrypt.html">Encrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ExtractText.html">ExtractText</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFToImage.html">PDFToImage</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PrintPDF.html">PrintPDF</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ConvertColorspace.html">ConvertColorspace</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/TextToPDF.html">TextToPDF</a>
+          </li>
+          </ul>
+              <h5>Developers Guide</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../userguide/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/bookmarks.html">Bookmarks</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/building_pdfbox.html">Building PDFBox</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/faq.html">FAQ</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/file_references.html">File References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/fonts.html">Fonts</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/highlighting.html">Highlighting</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/metadata.html">Metadata</a>
+          </li>
+              
+    <li class="none">
+              <strong>Redistribution</strong>
+        </li>
+              
+    <li class="none">
+                    <a href="../userguide/dot_net.html">.NET Version</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/text_extraction.html">Text Extraction</a>
+          </li>
+          </ul>
+              <h5>Project Documentation</h5>
+            <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+                    <a href="../project-info.html">Project Information</a>
+                </li>
+          </ul>
+                                           <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
+            <img alt="Built by Maven" src="../images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+            
+  
+    
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <meta name="keywords"></meta><div class="section"><h2><a name="Redistributing_PDFBox"></a>Redistributing PDFBox</h2>
+<p>
+      PDFBox makes use of several open source libraries.  Some are just required for building PDFBox, some are required for running PDFBox.
+      The below table summarizes the licences that are included with PDFBox and when they are required.
+      </p>
+<table class="bodyTable"><tr class="a"><th>Product (with license link)</th>
+<th>Used for</th>
+<th>Required for PDFBox redistribution</th>
+</tr>
+<tr class="b"><td><a href="http://www.adobe.com/devnet/font/#pcfi" class="externalLink">Adobe AFM</a></td>
+<td>Resource files for extracting font encoding.  Bundled inside the PDFBox jar file</td>
+<td>Yes</td>
+</tr>
+<tr class="a"><td><a href="http://www.adobe.com/devnet/font/#pcfi" class="externalLink">Adobe CMap</a></td>
+<td>Resource files for CJK font mapping.  Bundled inside the PDFBox jar file</td>
+<td>Yes</td>
+</tr>
+<tr class="b"><td><a href="http://www.adobe.com/devnet/opentype/archives/glyph.html" class="externalLink">Adobe Glyphlist</a></td>
+<td>Mapping for the computation of a Unicode character string from a sequence of glyphs.  Bundled inside the PDFBox jar file</td>
+<td>Yes</td>
+</tr>
+<tr class="a"><td><a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">Apache Ant</a></td>
+<td>Tool for building PDFBox</td>
+<td>No</td>
+</tr>
+<tr class="b"><td><a href="http://www.bouncycastle.org/licence.html" class="externalLink">bouncycastle</a></td>
+<td>Encryption libraries for encrypting/decrypting PDF documents</td>
+<td>Yes</td>
+</tr>
+<tr class="a"><td><a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">FontBox (incubating)</a></td>
+<td>Font Library</td>
+<td>Yes</td>
+</tr>
+<tr class="b"><td><a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">JempBox (incubating)</a></td>
+<td>Library for working with XMP metadata.</td>
+<td>Yes</td>
+</tr>
+<tr class="a"><td><a href="http://weblog.ikvm.net/story.aspx/license" class="externalLink">IKVM</a></td>
+<td>Library for .NET version of PDFBox</td>
+<td>Only if using .NET version(the DLLs in /bin) of PDFBox</td>
+</tr>
+<tr class="b"><td><a href="http://junit.sourceforge.net/cpl-v10.html" class="externalLink">junit</a></td>
+<td>Testing framework used in development</td>
+<td>No</td>
+</tr>
+<tr class="a"><td><a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">Apache Lucene</a></td>
+<td>Text search engine library.  PDFBox provides simple integration with Lucene.</td>
+<td>Optional, only if using Lucene</td>
+</tr>
+<tr class="b"><td><a href="http://source.icu-project.org/repos/icu/icu/trunk/license.html" class="externalLink">ICU4J</a></td>
+<td>Normalizing right to left text.</td>
+<td>Optional, only if extracting right to left text</td>
+</tr>
+</table>
+</div>
+
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2008-2010
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Propchange: pdfbox/site/publish/userguide/redistribution.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: pdfbox/site/publish/userguide/text_extraction.html
URL: http://svn.apache.org/viewvc/pdfbox/site/publish/userguide/text_extraction.html?rev=910660&view=auto
==============================================================================
--- pdfbox/site/publish/userguide/text_extraction.html (added)
+++ pdfbox/site/publish/userguide/text_extraction.html Tue Feb 16 19:37:14 2010
@@ -0,0 +1,367 @@
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+
+
+
+
+
+
+
+
+
+
+
+<html xmlns="http://www.w3.org/1999/xhtml">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <title>Apache PDFBox - Java PDF Library, pdftotext, PDF to text, java pdf text extraction</title>
+    <style type="text/css" media="all">
+      @import url("../css/maven-base.css");
+      @import url("../css/maven-theme.css");
+      @import url("../css/site.css");
+    </style>
+    <link rel="stylesheet" href="../css/print.css" type="text/css" media="print" />
+          </head>
+  <body class="composite">
+    <div id="banner">
+                  <a href="" id="bannerLeft">
+    
+                                            <img src="../images/Logo.gif" alt="Apache PDFBox" />
+    
+            </a>
+                        <a href="http://www.apache.org/" id="bannerRight">
+    
+                                    <img src="http://www.apache.org/images/asf_logo.gif" alt="The Apache Software Foundation" />
+    
+            </a>
+            <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="breadcrumbs">
+          
+  
+
+  
+    
+            
+  
+    
+              <div class="xright">      
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+    <div id="leftColumn">
+      <div id="navcolumn">
+           
+  
+
+  
+    
+            
+  
+    
+                   <h5>About</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../index.html">Welcome</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../download.html">Download</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/licenses/LICENSE-2.0" class="externalLink">License</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../mailing-list.html">Mailing Lists</a>
+          </li>
+              
+    <li class="none">
+                    <a href="https://issues.apache.org/jira/browse/PDFBOX" class="externalLink">Issue Tracker</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../references.html">References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/sponsorship.html" class="externalLink">ASF Sponsorship Program</a>
+          </li>
+              
+    <li class="none">
+                    <a href="http://www.apache.org/foundation/thanks.html" class="externalLink">ASF Thanks</a>
+          </li>
+          </ul>
+              <h5>Command Line Utilities</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Decrypt.html">Decrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/Encrypt.html">Encrypt</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ExtractText.html">ExtractText</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PDFToImage.html">PDFToImage</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/PrintPDF.html">PrintPDF</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/ConvertColorspace.html">ConvertColorspace</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../commandlineutilities/TextToPDF.html">TextToPDF</a>
+          </li>
+          </ul>
+              <h5>Developers Guide</h5>
+            <ul>
+              
+    <li class="none">
+                    <a href="../userguide/index.html">Index</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/bookmarks.html">Bookmarks</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/building_pdfbox.html">Building PDFBox</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/faq.html">FAQ</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/file_references.html">File References</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/fonts.html">Fonts</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/highlighting.html">Highlighting</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/metadata.html">Metadata</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/redistribution.html">Redistribution</a>
+          </li>
+              
+    <li class="none">
+                    <a href="../userguide/dot_net.html">.NET Version</a>
+          </li>
+              
+    <li class="none">
+              <strong>Text Extraction</strong>
+        </li>
+          </ul>
+              <h5>Project Documentation</h5>
+            <ul>
+              
+                
+              
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+            
+      
+              
+        <li class="collapsed">
+                    <a href="../project-info.html">Project Information</a>
+                </li>
+          </ul>
+                                           <a href="http://maven.apache.org/" title="Built by Maven" class="poweredBy">
+            <img alt="Built by Maven" src="../images/logos/maven-feather.png"></img>
+          </a>
+                       
+  
+
+  
+    
+            
+  
+    
+        </div>
+    </div>
+    <div id="bodyColumn">
+      <div id="contentBox">
+        <meta name="keywords"></meta><div class="section"><h2><a name="Extracting_Text"></a>Extracting Text</h2>
+<p>
+            See class:<a href="../apidocs/org/apache/pdfbox/util/PDFTextStripper.html">org.apache.pdfbox.util.PDFTextStripper</a><br />
+
+            See class:<a href="../apidocs/org/apache/pdfbox/searchengine/lucene/LucenePDFDocument.html">org.apache.pdfbox.searchengine.lucene.LucenePDFDocument</a><br />
+
+            See command line app:<a href="../commandlineutilities/ExtractText.html">ExtractText</a><br />
+</p>
+<p>
+            One of the main features of PDFBox is its ability to quickly and accurately extract text from a variety of PDF documents.
+            This functionality is encapsulated in the <a href="../apidocs/org/apache/pdfbox/util/PDFTextStripper.html">org.apache.pdfbox.util.PDFTextStripper</a> and
+            can be easily executed on the command line with <a href="../apidocs/org/apache/pdfbox/ExtractText.html">org.apache.pdfbox.ExtractText</a>.
+        </p>
+<div class="section"><h2><a name="Lucene_Integration"></a>Lucene Integration</h2>
+<p><a href="http://lucene.apache.org/java/docs/index.html" class="externalLink">Lucene</a> is an open source text search library from the Apache Jakarta Project.
+          In order for Lucene to be able to index a PDF document it must first be converted to text.  PDFBox provides a simple approach for adding
+          PDF documents into a Lucene index.</p>
+<div class="source"><pre>
+          Document luceneDocument = LucenePDFDocument.getDocument( ... );
+          </pre>
+</div>
+<p>
+          Now that you hava a Lucene Document object, you can add it to the Lucene index just like you would if it had been
+          created from a text or HTML file.
+          The <a href="../apidocs/org/apache/pdfbox/searchengine/lucene/LucenePDFDocument.html">LucenePDFDocument</a> automatically extracts
+          a variety of metadata fields from the PDF to be added to the index, the javadoc shows details on those fields.
+          This approach is very simple and should be sufficient for most users, if not then you can use some of the advanced text extraction
+          techniques described in the next section.
+          </p>
+</div>
+<div class="section"><h2><a name="Advanced_Text_Extraction"></a>Advanced Text Extraction</h2>
+<p>Some applications will have complex text extraction requiments and neither the command line application nor the LucenePDFDocument
+            will be able to fulfill those requirements.  It is possible for users to utilize or extend the
+            <a href="../apidocs/org/apache/pdfbox/util/PDFTextStripper.html">PDFTextStripper</a> class to meet some of these requirements.</p>
+<div class="section"><h2><a></a></h2>
+Limiting The Extracted Text<p>
+                There are several ways that we can limit the text that is extracted during the extraction process.  The simplest is to
+                specify the range of pages that you want to be extracted.  For example, to only extract text from the second and third pages
+                of the PDF document you could do this:
+                </p>
+<div class="source"><pre>
+            PDFTextStripper stripper = new PDFTextStripper();
+            stripper.setStartPage( 2 );
+            stripper.setEndPage( 3 );
+            stripper.writeText( ... );
+        </pre>
+</div>
+<note>The startPage and endPage properties of PDFTextStripper are 1 based and inclusive.</note><p>If you wanted to start on page 2 and extract to the end of the document then you would just set the startPage property.
+                By default all pages in the pdf document are extracted.</p>
+<p>It is also possible to limit the extracted text to be between two bookmarks in the page.  If you are not familiar with
+                how to use bookmarks in PDFBox then you should review the <a href="bookmarks.html">Bookmarks</a> page.  Similar to the startPage/endPage
+                properties, PDFTextStripper also has startBookmark/endBookmark properties.  There are some caveats to be aware of when using this
+                feature of the PDFTextStripper.  Not all bookmarks point to a page in the current PDF document.  The possible states of a bookmark are:</p>
+<ul><li>null - The property was not set, this is the default.</li>
+<li>Points to page in the PDF - The property was set and points to a valid page in the PDF</li>
+<li>Bookmark does not point to anything - The property was set but the bookmark does not point to any page</li>
+<li>Bookmark points to external action - The property was set, but it points to a page in a different PDF or performs an action when activated</li>
+</ul>
+<p>The table below will describe how PDFBox behaves in the various scenarios:</p>
+<table class="bodyTable"><tr class="a"><th>Start Bookmark</th>
+<th>End Bookmark</th>
+<th>Result</th>
+</tr>
+<tr class="b"><td>null</td>
+<td>null</td>
+<td>This is the default, the properties have no effect on the text extraction.</td>
+</tr>
+<tr class="a"><td>Points page in the PDF</td>
+<td>null</td>
+<td>Text extraction will begin on the page that this bookmark points to and go until the end of the document.</td>
+</tr>
+<tr class="b"><td>null</td>
+<td>Points page in the PDF</td>
+<td>Text extraction will begin on the first page and stop at the end of the page that this bookmark points to.</td>
+</tr>
+<tr class="a"><td>Bookmark does not point to anything</td>
+<td>null</td>
+<td>Because the PDFTextStripper cannot determine a start page based on the bookmark, it will start on the first page and go until
+                            the end of the document.</td>
+</tr>
+<tr class="b"><td>null</td>
+<td>Bookmark does not point to anything</td>
+<td>Because the PDFTextStripper cannot determine a end page based on the bookmark, it will start on the first page and go until
+                            the end of the document.</td>
+</tr>
+<tr class="a"><td>Bookmark does not point to anything</td>
+<td>Bookmark does not point to anything</td>
+<td>This is a special case!  If the startBookmark and endBookmark are exactly the same then no text will be extracted.  If
+                            they are different then it is not possible for the PDFTextStripper to determine that pages so it will include the
+                            entire document.</td>
+</tr>
+<tr class="b"><td>Bookmark points to external action</td>
+<td>Bookmark points to external action</td>
+<td>If either the startBookmark or the endBookmark refer to an external page or execute an action then an OutlineNotLocalException
+                        will be thrown to indicate to the user that the bookmark is not valid.</td>
+</tr>
+</table>
+<note>PDFTextStripper will check both the startPage/endPage and the startBookmark/endBookmark to determine if text should
+                      be extracted from the current page.</note></div>
+<div class="section"><h2><a name="External_Glyph_List"></a>External Glyph List</h2>
+<p>Some PDF files need to map between glyph names and Unicode values during text extraction.  PDFBox comes with an <a href="http://partners.adobe.com/public/developer/en/opentype/glyphlist.txt" class="externalLink">Adobe Glyph List</a>, but you may encounter files with glyph names that are not in that map. To use  your own glyphlist file, supply the file name to the <tt>glyphlist_ext</tt> JVM property. </p>
+</div>
+<div class="section"><h2><a name="Right_to_Left_Text"></a>Right to Left Text</h2>
+<p>Extracting text in languages whose text goes from right to left (such as Arabic and Hebrew) in PDF files can result in text that is backwards.  PDFBox can normalize and reverse the text if the <a href="http://icu-project.org/" class="externalLink">ICU4J</a> jar file has been placed on the classpath (it is an optional dependency). Note that you should also enable sorting with either <a href="../apidocs/org/apache/pdfbox/util/PDFTextStripper.html">org.apache.pdfbox.util.PDFTextStripper</a> or <a href="../apidocs/org/apache/pdfbox/ExtractText.html">org.apache.pdfbox.ExtractText</a> to ensure accurate output.</p>
+</div>
+</div>
+</div>
+
+      </div>
+    </div>
+    <div class="clear">
+      <hr/>
+    </div>
+    <div id="footer">
+      <div class="xright">&#169;  
+          2008-2010
+    
+          The Apache Software Foundation
+          
+  
+
+  
+    
+            
+  
+    
+  </div>
+      <div class="clear">
+        <hr/>
+      </div>
+    </div>
+  </body>
+</html>

Propchange: pdfbox/site/publish/userguide/text_extraction.html
------------------------------------------------------------------------------
    svn:eol-style = native



Mime
View raw message