commons-user mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Albretch Mueller <lbrt...@gmail.com>
Subject [compress] trying to feed data from tar.gz and tar.bz2 files directly to an XMLReader ...
Date Mon, 27 Jun 2011 07:40:23 GMT
~
 Feeding data contained in zip files directly to an XMLReader
(including schema validation) is very simple (notice some example code
below). The thing is that you may get huge data sets you may not care
or have enough hard drive space to unzip locally
~
 I don't see how you can do that for tar.gz and tar.bz2 files in those
cases it seems you have no other way but unzipping first and
afterwards process the archive entries ...
~
 How can you do with tar balls as you do with with zip files? Am I
missing something here?
~
 thank you
 lbrtchx
~
 toy code (verbatim) to show what I mean with the zip format:
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~
import javax.xml.validation.*;
import javax.xml.parsers.*;
import javax.xml.validation.*;
import javax.xml.transform.*;
import javax.xml.transform.stream.*;

import org.xml.sax.*;
import org.xml.sax.ext.*;

import java.io.*;
import java.util.*;
import java.util.zip.*;

// __ Zipped XML Reader
public class ZXMLRdr02Test{

// __
 public static void main(String[] args){

// __ large xml files from http://download.wikimedia.org/enwiki/latest/

  String aIXSDFl = "/media/sda1/prjx/sw/text_synch/XML_Fls/export-0.5.xsd";
  String aIXMLFl =
"/media/sda1/prjx/sw/text_synch/XML_Fls/enwiki-latest-stub-articles27.xml.zip";

// __
  int OPEN_READ = 0x1;
  ZipFile ZFl = null;
  ZipEntry ZEntry;
  BufferedReader BfrRdr = null;
  XMLReader XMLRdr = null;
// __

  try{
   SAXParserFactory SAXFctry = SAXParserFactory.newInstance();
   SAXFctry.setValidating(false); // stop standard validation to allow
custom schema
   SAXFctry.setNamespaceAware(true);
   SchemaFactory SchmFctry =
SchemaFactory.newInstance(javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI);
   SAXFctry.setSchema(SchmFctry.newSchema(new Source[] {new
StreamSource(aIXSDFl)}));
   SAXParser SAXPrsr = SAXFctry.newSAXParser();
// __ create XMLReader and register the content handler
   XMLRdr = SAXPrsr.getXMLReader();
   XMLRdr.setContentHandler(new DefaultHandler2());
// __ Feeding data from zipped files directly to the XMLReader
(without unzipping it to an output file)
   File IZFl = new File(aIXMLFl);
   ZFl = new ZipFile(IZFl, OPEN_READ);
// __
   Enumeration<? extends ZipEntry> ZENum = ZFl.entries();
   while (ZENum.hasMoreElements()) {
    ZEntry = (ZipEntry)ZENum.nextElement();
    System.out.println("// __ XML Scanning: |" + ZEntry.getName() + "|");
    InputStream IS = ZFl.getInputStream(ZEntry);
    BfrRdr = new BufferedReader(new InputStreamReader(IS, "UTF-8"));
// __
    XMLRdr.parse(new InputSource(BfrRdr));
// __
    IS.close();
   }
   ZFl.close();
// __
  }catch(FileNotFoundException FNFX){ FNFX.printStackTrace(System.err); }
    catch(IOException IOX){ IOX.printStackTrace(System.err); }
   catch(ParserConfigurationException PrsConfX){
PrsConfX.printStackTrace(System.err); }
    catch(SAXException SAXX){ SAXX.printStackTrace(System.err); }
 }
}
~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~ ~

---------------------------------------------------------------------
To unsubscribe, e-mail: user-unsubscribe@commons.apache.org
For additional commands, e-mail: user-help@commons.apache.org


Mime
View raw message