commons-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Apache Wiki <wikidi...@apache.org>
Subject [Commons Wiki] Update of "ExtractAndDecompressGzipFiles" by KenTanaka
Date Tue, 06 Nov 2007 22:20:49 GMT
Dear Wiki user,

You have subscribed to a wiki page or wiki category on "Commons Wiki" for change notification.

The following page has been changed by KenTanaka:
http://wiki.apache.org/jakarta-commons/ExtractAndDecompressGzipFiles

The comment on the change is:
Added main code examples

------------------------------------------------------------------------------
  
  There should be a cleaner, more direct route. Maybe someone more familiar with VFS can post
better code.
  
- This example uses Maven2. There is a pom.xml to define the project
+ This example uses Maven2. There is a '''{{{pom.xml}}}''' to define the project
  
+ {{{
+ <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+     <modelVersion>4.0.0</modelVersion>
+     <groupId>gov.noaa.eds</groupId>
+     <artifactId>tryVfs</artifactId>
+     <packaging>jar</packaging>
+     <version>1.0-SNAPSHOT</version>
+     <name>Try apache commons vfs</name>
+     <url>http://maven.apache.org</url>
+     <build>
+         <plugins>
+             <plugin>
+                 <groupId>org.apache.maven.plugins</groupId>
+                 <artifactId>maven-compiler-plugin</artifactId>
+                 <configuration>
+                     <source>1.5</source>
+                     <target>1.5</target>
+                 </configuration>
+             </plugin>
+             <plugin>
+                 <!-- Usage: mvn assembly:assembly -->
+                 <artifactId>maven-assembly-plugin</artifactId>
+                 <configuration>
+                     <descriptorRefs>
+                         <descriptorRef>jar-with-dependencies</descriptorRef>
+                     </descriptorRefs>
+                     <archive>
+                         <manifest>
+                             <mainClass>gov.noaa.eds.tryVfs.MultiStep</mainClass>
+                         </manifest>
+                     </archive>
+                 </configuration>
+             </plugin>
+         </plugins>
+     </build>
+     <dependencies>
+         <dependency>
+             <groupId>commons-vfs</groupId>
+             <artifactId>commons-vfs</artifactId>
+             <version>1.0</version>
+         </dependency>
+         <dependency>
+             <groupId>junit</groupId>
+             <artifactId>junit</artifactId>
+             <version>3.8.1</version>
+             <scope>test</scope>
+         </dependency>
+     </dependencies>
+ </project>
+ }}}
+ 
+ Content of '''{{{src/main/java/gov/noaa/eds/tryVfs/MultiStep.java}}}'''
+ {{{
+ package gov.noaa.eds.tryVfs;
+ 
+ import org.apache.commons.vfs.AllFileSelector;
+ import org.apache.commons.vfs.FileName;
+ import org.apache.commons.vfs.FileObject;
+ import org.apache.commons.vfs.FileSystemException;
+ import org.apache.commons.vfs.FileSystemManager;
+ import org.apache.commons.vfs.FileType;
+ import org.apache.commons.vfs.FileTypeSelector;
+ import org.apache.commons.vfs.VFS;
+ import org.apache.commons.vfs.provider.local.LocalFile;
+ 
+ /**
+  * Try using VFS to read the content of a compressed (gz) file inside of
+  * a tar file. Extract tar file objects. If they are gzip files, decompress them.
+  * Any directory structure in the tarfile is not being preserved, the contents
+  * are pulled out to the same location regardless of directory hierarchy (for
+  * the purposes of this example, all objects in the tar file have unique names,
+  * so there are no file name conflicts).
+  *
+  * Use a multiple step approach.
+  * 1. extract gzipped file from tar file
+  * 2. decompress gzipped content to a temporary directory
+  * 3. move decompressed content to desired destination
+  * 4. remove temporary directory
+  * 5. remove gzipped file
+  *
+  * There should be a cleaner more direct route, but I haven't discovered it yet.
+  */
+ public class MultiStep {
+     FileSystemManager fsManager = null;
+     static String extractDirname = "/extra/data/tryVfs";
+     LocalFile extractDir = null;
+     
+     /**
+      * Extract files from a tar file. If the file extracted is gzipped,
+      * decompress it and remove the gzipped version.
+      * @param args command line arguments are currently not used
+      */
+     public static void main( String[] args ) {
+         MultiStep msExtract = new MultiStep();
+         
+         try {
+             msExtract.fsManager = VFS.getManager();
+         } catch (FileSystemException ex) {
+             throw new RuntimeException("failed to get fsManager from VFS", ex);
+         }
+         
+         try {
+             msExtract.extractDir = (LocalFile) msExtract.fsManager.resolveFile("file://"
+                     + extractDirname);
+             if (! msExtract.extractDir.exists()) {
+                 msExtract.extractDir.createFolder();
+             }
+         } catch (FileSystemException ex) {
+             throw new RuntimeException("failed to prepare extract directory " 
+                     + extractDirname, ex);
+         }
+         
+         
+         /* Create a tarFile object */
+         FileObject tarFile;
+         try {
+             System.out.println("Resolve tar file:");
+             tarFile = msExtract.fsManager.resolveFile(
+                     "tar:/extra/data/tryVfs/archive.tar");
+             
+             FileName tarFileName = tarFile.getName();
+             System.out.println("  Path     : " + tarFileName.getPath());
+             System.out.println("  URI      : " + tarFileName.getURI());
+         } catch (Exception ex) {
+             throw new RuntimeException("failed to open tar file ", ex);
+         }
+         
+         /* Work on files inside tarFile */
+         FileObject[] children;
+         try {
+             children = tarFile.getChildren();
+         } catch (FileSystemException ex) {
+             throw new RuntimeException("failed to get contents of tarfile ", ex);
+         }
+         
+         for (FileObject f : children) {
+             msExtract.processChild(f);
+         }
+         
+     } // main( String[] args )
+     
+     private void processChild(FileObject f) {
+         try {
+             if (f.getType() == FileType.FOLDER) {
+                 // Recursively process files in this folder
+                 FileObject[] children = f.getChildren();
+                 for (FileObject subfile : children) {
+                     processChild(subfile);
+                 }
+             } else {
+                 FileName fname = f.getName();
+                 String extractName = new String(this.extractDir.getName() + "/"
+                         + fname.getBaseName());
+                 System.out.println("Extracting " + extractName);
+                 LocalFile extractFile = (LocalFile) this.fsManager.resolveFile(extractName);
+                 extractFile.copyFrom(f, new AllFileSelector());
+                 
+                 // if the file is gzipped, decompress it
+                 if (extractFile.getName().getExtension().equals("gz")) {
+                     System.out.println("Decompressing " + extractName);
+                     String gzName = new String("gz://" + extractFile.getName().getPath());
+                     System.out.println("gzName=" + gzName);
+                     FileObject gzFile = this.fsManager.resolveFile(gzName);
+                     String fileName = extractFile.getName().getBaseName().replaceAll(".gz$",
"");
+                     
+                     // The decompressed path we want
+                     String decompName = new String(this.extractDir.getName() + "/" 
+                             + fileName);
+                     
+                     // A temporary Directory
+                     String tmpDirname = new String(this.extractDir.getName() + "/" 
+                             + fileName + ".tmp");
+                     
+                     // A temporary file path
+                     String tmpFilename = new String(tmpDirname + "/" + fileName);
+                     
+                     // Some debug lines
+                     System.out.println("fileName   =" + fileName);
+                     System.out.println("decompName =" + decompName);
+                     System.out.println("tmpDirname =" + tmpDirname);
+                     System.out.println("tmpFilename=" + tmpFilename);
+                     
+                     // Extracting from gzip file ends up with a directory containing what
+                     // we want.
+                     LocalFile tmpDir = (LocalFile) this.fsManager.resolveFile(tmpDirname);
+                     tmpDir.copyFrom(gzFile, new FileTypeSelector(FileType.FILE));
+                     
+                     // Move the uncompressed file to the location desired.
+                     LocalFile tmpFile = (LocalFile) this.fsManager.resolveFile(tmpFilename);
+                     LocalFile decompFile = (LocalFile) this.fsManager.resolveFile(decompName);
+                     tmpFile.moveTo(decompFile);
+                     
+                     // Delete the temporary directory.
+                     tmpDir.delete(new AllFileSelector());
+                     
+                     // Delete the gzip file now that we have the uncompressed version.
+                     // Note that the plain file FileObject (extractFile) is used 
+                     // for deleting instead of the gzip FileObject (gzFile).
+                     extractFile.delete(new AllFileSelector());
+                 }
+             }
+         } catch (FileSystemException ex) {
+             ex.printStackTrace();
+             throw new RuntimeException("Error working on tarfile object " + f.getName());
+         }
+     } // processChild(FileObject f)
+ }
+ 
+ }}}
+ 

---------------------------------------------------------------------
To unsubscribe, e-mail: dev-unsubscribe@commons.apache.org
For additional commands, e-mail: dev-help@commons.apache.org


Mime
View raw message