Return-Path: X-Original-To: apmail-directory-commits-archive@www.apache.org Delivered-To: apmail-directory-commits-archive@www.apache.org Received: from mail.apache.org (hermes.apache.org [140.211.11.3]) by minotaur.apache.org (Postfix) with SMTP id 59A5F11456 for ; Sun, 3 Aug 2014 13:55:26 +0000 (UTC) Received: (qmail 61288 invoked by uid 500); 3 Aug 2014 13:55:26 -0000 Delivered-To: apmail-directory-commits-archive@directory.apache.org Received: (qmail 61241 invoked by uid 500); 3 Aug 2014 13:55:26 -0000 Mailing-List: contact commits-help@directory.apache.org; run by ezmlm Precedence: bulk List-Help: List-Unsubscribe: List-Post: List-Id: Reply-To: dev@directory.apache.org Delivered-To: mailing list commits@directory.apache.org Received: (qmail 61231 invoked by uid 99); 3 Aug 2014 13:55:26 -0000 Received: from nike.apache.org (HELO nike.apache.org) (192.87.106.230) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 03 Aug 2014 13:55:26 +0000 X-ASF-Spam-Status: No, hits=-2000.0 required=5.0 tests=ALL_TRUSTED X-Spam-Check-By: apache.org Received: from [140.211.11.4] (HELO eris.apache.org) (140.211.11.4) by apache.org (qpsmtpd/0.29) with ESMTP; Sun, 03 Aug 2014 13:55:21 +0000 Received: from eris.apache.org (localhost [127.0.0.1]) by eris.apache.org (Postfix) with ESMTP id 4AC9F2388868; Sun, 3 Aug 2014 13:54:54 +0000 (UTC) Content-Type: text/plain; charset="utf-8" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: svn commit: r1615405 [1/2] - in /directory/apacheds/trunk/bulkloader: ./ src/main/java/org/apache/directory/mavibot/btree/ src/test/java/org/apache/directory/mavibot/btree/ src/test/resources/ Date: Sun, 03 Aug 2014 13:54:47 -0000 To: commits@directory.apache.org From: elecharny@apache.org X-Mailer: svnmailer-1.0.9 Message-Id: <20140803135454.4AC9F2388868@eris.apache.org> X-Virus-Checked: Checked by ClamAV on apache.org Author: elecharny Date: Sun Aug 3 13:54:45 2014 New Revision: 1615405 URL: http://svn.apache.org/r1615405 Log: o Added references to the ldif-partition and server-config projects o Added some missing Javadoc o Modified the FastLdifReader to work on Windows o Initialized the CacheService which is necessary now that we are using EhCache in the partion as a replacement for LRUMap o We can inject the configuration file as a parameter, so that we can index the configured AT o Correctly initialized the test o Added a file test with 30 k entries o Added a BufferedReader that keep a track of the correct offset in the ldif file, even on windows Added: directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/PositionBufferedReader.java directory/apacheds/trunk/bulkloader/src/test/resources/30k.ldif Modified: directory/apacheds/trunk/bulkloader/pom.xml directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/DnTuple.java directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/FastLdifReader.java directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilder.java directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/Option.java directory/apacheds/trunk/bulkloader/src/test/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilderTest.java Modified: directory/apacheds/trunk/bulkloader/pom.xml URL: http://svn.apache.org/viewvc/directory/apacheds/trunk/bulkloader/pom.xml?rev=1615405&r1=1615404&r2=1615405&view=diff ============================================================================== --- directory/apacheds/trunk/bulkloader/pom.xml (original) +++ directory/apacheds/trunk/bulkloader/pom.xml Sun Aug 3 13:54:45 2014 @@ -34,6 +34,16 @@ org.apache.directory.server apacheds-mavibot-partition + + + org.apache.directory.server + apacheds-server-config + + + + org.apache.directory.server + apacheds-ldif-partition + Modified: directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/DnTuple.java URL: http://svn.apache.org/viewvc/directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/DnTuple.java?rev=1615405&r1=1615404&r2=1615405&view=diff ============================================================================== --- directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/DnTuple.java (original) +++ directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/DnTuple.java Sun Aug 3 13:54:45 2014 @@ -34,12 +34,16 @@ import org.apache.directory.server.core. */ public class DnTuple implements Comparable { + /** The DN */ private Dn dn; + /** The DN length */ private int len; + /** The offset if the LDIF file */ private long offset; + /** The DN ID */ private String id; private DnTuple parent; Modified: directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/FastLdifReader.java URL: http://svn.apache.org/viewvc/directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/FastLdifReader.java?rev=1615405&r1=1615404&r2=1615405&view=diff ============================================================================== --- directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/FastLdifReader.java (original) +++ directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/FastLdifReader.java Sun Aug 3 13:54:45 2014 @@ -20,19 +20,21 @@ package org.apache.directory.mavibot.btree; +import java.io.BufferedReader; import java.io.File; +import java.io.FileNotFoundException; +import java.io.FileReader; +import java.io.IOException; import java.util.ArrayList; -import java.util.Iterator; import java.util.NoSuchElementException; import org.apache.directory.api.i18n.I18n; import org.apache.directory.api.ldap.model.exception.LdapException; -import org.apache.directory.api.ldap.model.ldif.ChangeType; import org.apache.directory.api.ldap.model.ldif.LdapLdifException; import org.apache.directory.api.ldap.model.ldif.LdifEntry; import org.apache.directory.api.ldap.model.ldif.LdifReader; import org.apache.directory.api.ldap.model.name.Dn; -import org.apache.directory.api.util.Strings; +import org.apache.directory.server.core.api.DnFactory; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -47,7 +49,7 @@ import org.slf4j.LoggerFactory; */ /** no qualifier */ class FastLdifReader extends LdifReader { - + /** A logger for this class */ private static final Logger LOG = LoggerFactory.getLogger( FastLdifReader.class ); /** the pre-fetched DnTuple */ @@ -55,34 +57,80 @@ import org.slf4j.LoggerFactory; /** the next tuple */ private DnTuple nextTuple; + + /** The DnFactory */ + private DnFactory dnFactory; + /** * * Creates a new instance of FastLdifReader. * * @param file the LDIF file - * @throws LdapLdifException + * @throws LdapException + * @throws FileNotFoundException */ - public FastLdifReader( File file ) throws LdapLdifException + public FastLdifReader( File file, DnFactory dnFactory ) throws LdapException, FileNotFoundException { - super( file ); + super(); + reader = new PositionBufferedReader( new FileReader( file ) ); + this.dnFactory = dnFactory; validateDn = false; + + init(); } @Override - protected void init() throws LdapException + public void init() throws LdapException { lines = new ArrayList(); position = 0; version = DEFAULT_VERSION; containsChanges = false; containsEntries = false; + + // No need to validate the Dn while we are parsing it from the LDIF file + validateDn = false; // First get the version - if any - - version = parseVersion(); + fastParseVersion(); firstFetchedTuple = parseDnAlone(); } + + + /** + * Parse the version from the ldif input. + * + * @return A number representing the version (default to 1) + * @throws LdapLdifException If the version is incorrect or if the input is incorrect + */ + private void fastParseVersion() throws LdapLdifException + { + // First, read a list of lines + fastReadLines(); + + if ( lines.size() == 0 ) + { + LOG.warn( "The ldif file is empty" ); + return; + } + + // get the first line + String line = lines.get( 0 ); + + // ::= "version:" + if ( line.startsWith( "version:" ) ) + { + // Ok, skip the line + position += "version:".length(); + + // We have found the version, just discard the line from the list + lines.remove( 0 ); + } + + return; + } @Override @@ -107,7 +155,8 @@ import org.slf4j.LoggerFactory; nextTuple = firstFetchedTuple; - readLines(); + // Read all the lines for one single entry + fastReadLines(); try { @@ -123,12 +172,14 @@ import org.slf4j.LoggerFactory; throw new NoSuchElementException( le.getMessage() ); } + //System.out.println( nextTuple ); LOG.debug( "next(): -- saving DnTuple {}\n", nextTuple ); return null; } catch ( LdapLdifException ne ) { + ne.printStackTrace(); LOG.error( I18n.err( I18n.ERR_12071 ) ); error = ne; return null; @@ -136,6 +187,9 @@ import org.slf4j.LoggerFactory; } + /** + * Get teh DN from an entry, ignoring the remaining data + */ private DnTuple parseDnAlone() throws LdapException { if ( ( lines == null ) || ( lines.size() == 0 ) ) @@ -151,145 +205,114 @@ import org.slf4j.LoggerFactory; String name = parseDn( line ); - Dn dn = new Dn( name ); - - DnTuple tuple = new DnTuple( dn, entryOffset, entryLen ); + Dn dn = dnFactory.create( name ); - // Ok, we have found a Dn - //LdifEntry entry = new LdifEntry( entryLen, entryOffset ); + DnTuple tuple = new DnTuple( dn, entryOffset, (int)(offset - entryOffset) ); - //entry.setDn( dn ); - - // We remove this dn from the lines - lines.remove( 0 ); + return tuple; + } - // Now, let's iterate through the other lines - Iterator iter = lines.iterator(); + protected String getLine() throws IOException + { + return ( ( PositionBufferedReader ) reader ).readLine(); + } - // This flag is used to distinguish between an entry and a change - int type = LDIF_ENTRY; - // The following boolean is used to check that a control is *not* - // found elswhere than just after the dn - boolean controlSeen = false; + /** + * Reads an entry in a ldif buffer, and returns the resulting lines, without + * comments, and unfolded. + * + * The lines represent *one* entry. + * + * @throws LdapLdifException If something went wrong + */ + private void fastReadLines() throws LdapLdifException + { + String line; + boolean insideComment = true; + boolean isFirstLine = true; - // We use this boolean to check that we do not have AttributeValues - // after a change operation - boolean changeTypeSeen = false; + lines.clear(); + entryOffset = offset; - ChangeType operation = ChangeType.Add; - String lowerLine; + StringBuffer sb = new StringBuffer(); - while ( iter.hasNext() ) + try { - lineNumber++; - - // Each line could start either with an OID, an attribute type, with - // "control:" or with "changetype:" - line = iter.next(); - lowerLine = Strings.toLowerCase( line ); - - // We have three cases : - // 1) The first line after the Dn is a "control:" - // 2) The first line after the Dn is a "changeType:" - // 3) The first line after the Dn is anything else - if ( lowerLine.startsWith( "control:" ) ) + while ( ( line = getLine() ) != null ) { - if ( containsEntries ) - { - LOG.error( I18n.err( I18n.ERR_12004_CHANGE_NOT_ALLOWED ) ); - throw new LdapLdifException( I18n.err( I18n.ERR_12005_NO_CHANGE ) ); - } - - containsChanges = true; - - if ( controlSeen ) - { - LOG.error( I18n.err( I18n.ERR_12050 ) ); - throw new LdapLdifException( I18n.err( I18n.ERR_12051 ) ); - } - - // Parse the control - // SKIP it - } - else if ( lowerLine.startsWith( "changetype:" ) ) - { - if ( containsEntries ) - { - LOG.error( I18n.err( I18n.ERR_12004_CHANGE_NOT_ALLOWED ) ); - throw new LdapLdifException( I18n.err( I18n.ERR_12005_NO_CHANGE ) ); - } - - containsChanges = true; - - if ( changeTypeSeen ) - { - LOG.error( I18n.err( I18n.ERR_12052 ) ); - throw new LdapLdifException( I18n.err( I18n.ERR_12053 ) ); - } - - // A change request - type = CHANGE; - controlSeen = true; + lineNumber++; - operation = parseChangeType( line ); - - if ( operation != ChangeType.Add ) - { - throw new IllegalArgumentException( "ChangeType " + operation + " is not allowed during bulk load" ); - } - // Parse the change operation in a separate function - // SKIP it - while ( iter.hasNext() ) + if ( line.length() == 0 ) { - iter.next(); - } + if ( isFirstLine ) + { + continue; + } + else + { + // The line is empty, we have read an entry + insideComment = false; + offset = ((PositionBufferedReader)reader).getFilePos(); - changeTypeSeen = true; - } - else if ( line.indexOf( ':' ) > 0 ) - { - if ( containsChanges ) - { - LOG.error( I18n.err( I18n.ERR_12004_CHANGE_NOT_ALLOWED ) ); - throw new LdapLdifException( I18n.err( I18n.ERR_12005_NO_CHANGE ) ); + break; + } } - containsEntries = true; - - if ( controlSeen || changeTypeSeen ) + // We will read the first line which is not a comment + switch ( line.charAt( 0 ) ) { - LOG.error( I18n.err( I18n.ERR_12054 ) ); - throw new LdapLdifException( I18n.err( I18n.ERR_12055 ) ); + case '#': + insideComment = true; + break; + + case ' ': + isFirstLine = false; + + if ( insideComment ) + { + continue; + } + else if ( sb.length() == 0 ) + { + LOG.error( I18n.err( I18n.ERR_12062_EMPTY_CONTINUATION_LINE ) ); + throw new LdapLdifException( I18n.err( I18n.ERR_12061_LDIF_PARSING_ERROR ) ); + } + else + { + sb.append( line.substring( 1 ) ); + } + + insideComment = false; + break; + + default: + isFirstLine = false; + + // We have found a new entry + // First, stores the previous one if any. + if ( sb.length() != 0 ) + { + lines.add( sb.toString() ); + } + + sb = new StringBuffer( line ); + insideComment = false; + break; } - // SKIP it - //parseAttributeValue( entry, line, lowerLine ); - type = LDIF_ENTRY; + offset = ((PositionBufferedReader)reader).getFilePos(); } - else - { - // Invalid attribute Value - LOG.error( I18n.err( I18n.ERR_12056 ) ); - throw new LdapLdifException( I18n.err( I18n.ERR_12057_BAD_ATTRIBUTE ) ); - } - } - - if ( type == LDIF_ENTRY ) - { - LOG.debug( "Read an entry : {}", tuple ); } - else if ( type == CHANGE ) + catch ( IOException ioe ) { - //entry.setChangeType( operation ); - LOG.debug( "Read a modification : {}", tuple ); + throw new LdapLdifException( I18n.err( I18n.ERR_12063_ERROR_WHILE_READING_LDIF_LINE ), ioe ); } - else + + // Stores the current line if necessary. + if ( sb.length() != 0 ) { - LOG.error( I18n.err( I18n.ERR_12058_UNKNOWN_ENTRY_TYPE ) ); - throw new LdapLdifException( I18n.err( I18n.ERR_12059_UNKNOWN_ENTRY ) ); + lines.add( sb.toString() ); } - - return tuple; } } Modified: directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilder.java URL: http://svn.apache.org/viewvc/directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilder.java?rev=1615405&r1=1615404&r2=1615405&view=diff ============================================================================== --- directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilder.java (original) +++ directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilder.java Sun Aug 3 13:54:45 2014 @@ -52,6 +52,8 @@ import org.apache.directory.api.ldap.mod import org.apache.directory.api.ldap.model.entry.DefaultEntry; import org.apache.directory.api.ldap.model.entry.Entry; import org.apache.directory.api.ldap.model.entry.Value; +import org.apache.directory.api.ldap.model.exception.LdapException; +import org.apache.directory.api.ldap.model.exception.LdapInvalidDnException; import org.apache.directory.api.ldap.model.filter.ExprNode; import org.apache.directory.api.ldap.model.filter.PresenceNode; import org.apache.directory.api.ldap.model.ldif.LdifEntry; @@ -69,14 +71,20 @@ import org.apache.directory.api.util.Dat import org.apache.directory.mavibot.btree.serializer.LongSerializer; import org.apache.directory.mavibot.btree.serializer.StringSerializer; import org.apache.directory.mavibot.btree.util.Strings; +import org.apache.directory.server.config.ConfigPartitionReader; +import org.apache.directory.server.config.LdifConfigExtractor; +import org.apache.directory.server.config.beans.ConfigBean; import org.apache.directory.server.constants.ApacheSchemaConstants; import org.apache.directory.server.constants.ServerDNConstants; +import org.apache.directory.server.core.api.CacheService; import org.apache.directory.server.core.api.DnFactory; +import org.apache.directory.server.core.api.InstanceLayout; import org.apache.directory.server.core.api.filtering.EntryFilteringCursor; import org.apache.directory.server.core.api.interceptor.context.SearchOperationContext; import org.apache.directory.server.core.partition.impl.btree.mavibot.MavibotIndex; import org.apache.directory.server.core.partition.impl.btree.mavibot.MavibotPartition; import org.apache.directory.server.core.partition.impl.btree.mavibot.MavibotRdnIndex; +import org.apache.directory.server.core.partition.ldif.SingleFileLdifPartition; import org.apache.directory.server.core.shared.DefaultDnFactory; import org.apache.directory.server.xdbm.Index; import org.apache.directory.server.xdbm.ParentIdAndRdn; @@ -104,35 +112,94 @@ public class MavibotPartitionBuilder //private MavibotPartition partition; private SchemaManager schemaManager; + + /** The CacheService used internally by the partition */ + private CacheService cacheService; private CsnFactory csnFactory; private RandomAccessFile raf; + /** The LDIF file to extract */ private String ldifFile; + + /** The file containing teh configuration */ + private String configFile; private String masterTableName = "master"; private List indexAttributes = new ArrayList(); private int totalEntries = 0; + + /** The DN factory, used to check DN */ + private DnFactory dnFactory; + private static final Logger LOG = LoggerFactory.getLogger( MavibotPartitionBuilder.class ); - public MavibotPartitionBuilder( String ldifFile, String outputDir ) - { - this( ldifFile, outputDir, BTree.DEFAULT_PAGE_SIZE, 1 ); + /** + * Creates a new instance of MavibotPartitionBuilder. + * + * @param configFile The file containing the configuration partition + * @param ldifFile The ldif file to load + * @param outputDir The directory in which we want the resulting partition file to be stored + */ + public MavibotPartitionBuilder( String configFile, String ldifFile, String outputDir ) + { + this( configFile, ldifFile, outputDir, BTree.DEFAULT_PAGE_SIZE, 1 ); } - public MavibotPartitionBuilder( String ldifFile, String outputDir, int numKeysInNode, int rid ) + /** + * + * Creates a new instance of MavibotPartitionBuilder. + * + * @param configFile The file containing the configuration partition + * @param ldifFile The ldif file to load + * @param outputDir The directory in which we want the resulting partition file to be stored + * @param numKeysInNode The number of keys we can store in a node + * @param rid The replica ID + */ + public MavibotPartitionBuilder( String configFile, String ldifFile, String outputDir, int numKeysInNode, int rid ) { + this.configFile = configFile; this.ldifFile = ldifFile; this.outputDir = outputDir; this.numKeysInNode = numKeysInNode; this.csnFactory = new CsnFactory( rid ); } + + /** + * Load the configuration. This is a needed step, as we have to know which indexes + * have to be created + * + * @param workDir The directory in which the configuration partition will be found + * @return A ConfigBean instance, containing the configuration + * @throws LdapException If we can't read teh configuration + */ + private ConfigBean readConfig( String workDir ) throws LdapException + { + File configDir = new File( workDir, "config" ); // could be any directory, cause the config is now in a single file + + String configFile = LdifConfigExtractor.extractSingleFileConfig( configDir, "config.ldif", true ); + + SingleFileLdifPartition configPartition = new SingleFileLdifPartition( schemaManager, dnFactory ); + configPartition.setId( "config" ); + configPartition.setPartitionPath( new File( configFile ).toURI() ); + configPartition.setSuffixDn( new Dn( "ou=config" ) ); + configPartition.setSchemaManager( schemaManager ); + + configPartition.initialize(); + + ConfigPartitionReader cpReader = new ConfigPartitionReader( configPartition ); + + ConfigBean configBean = cpReader.readConfig( "ou=config" ); + + return configBean; + } + private BTree build( Iterator sortedTupleItr, String name ) throws Exception { @@ -411,7 +478,7 @@ public class MavibotPartitionBuilder raf = new RandomAccessFile( file, "r" ); - FastLdifReader reader = new FastLdifReader( file ); + FastLdifReader reader = new FastLdifReader( file, dnFactory ); Set sortedDnSet = new TreeSet(); @@ -419,12 +486,17 @@ public class MavibotPartitionBuilder { // FastLdifReader will always return NULL LdifEntry // call getDnTuple() after next() to get a DnTuple - LdifEntry entry = reader.next(); + reader.next(); DnTuple dt = reader.getDnTuple(); dt.getDn().apply( schemaManager ); sortedDnSet.add( dt ); + + if ( dt.getDn().toString().equals( "uid=user.29998,ou=People,dc=example,dc=com" ) ) + { + System.out.println( dt ); + } } reader.close(); @@ -725,22 +797,37 @@ public class MavibotPartitionBuilder } + /** + * Import a LDIF file and create a fully working Mavibot partition. + * TODO buildPartition. + * + */ public void buildPartition() { + // First, we load the Schema, as we will check the entries before + // injecting them into the partition try { System.out.println( "Loading schema using JarLdifSchemaLoader" ); JarLdifSchemaLoader loader = new JarLdifSchemaLoader(); schemaManager = new DefaultSchemaManager( loader ); schemaManager.loadAllEnabled(); + dnFactory = new DefaultDnFactory( schemaManager, null ); + cacheService = new CacheService(); + InstanceLayout instanceLayout = new InstanceLayout( outputDir ); + cacheService.initialize( instanceLayout ); + } catch ( Exception e ) { + e.printStackTrace(); LOG.warn( "Failed to initialize the schema manager", e ); return; } + // Now, read all the DNs, and sort them Set sortedDnSet = null; + try { long sortT0 = System.currentTimeMillis(); @@ -756,6 +843,7 @@ public class MavibotPartitionBuilder } catch ( Exception e ) { + e.printStackTrace(); LOG.warn( "Failed to parse the given LDIF file ", e ); return; } @@ -768,19 +856,19 @@ public class MavibotPartitionBuilder } MavibotPartition partition = null; + try { long partT0 = System.currentTimeMillis(); System.out.print( "Creating partition..." ); - DnFactory dnFactory = new DefaultDnFactory( schemaManager, null ); - partition = new MavibotPartition( schemaManager, dnFactory ); partition.setId( "builder" ); partition.setSuffixDn( suffixDn ); File dir = new File( outputDir ); partition.setPartitionPath( dir.toURI() ); + partition.setCacheService( cacheService ); for( String atName : indexAttributes ) { @@ -799,6 +887,7 @@ public class MavibotPartitionBuilder } catch ( Exception e ) { + e.printStackTrace(); LOG.warn( "Failed to initialize the partition", e ); return; } @@ -813,6 +902,7 @@ public class MavibotPartitionBuilder } catch( Exception e ) { + e.printStackTrace(); LOG.warn( "Failed to build master table", e ); e.printStackTrace(); return; @@ -837,6 +927,7 @@ public class MavibotPartitionBuilder } catch( Exception e ) { + e.printStackTrace(); LOG.warn( "Failed to build the RDN index", e ); return; } @@ -891,6 +982,7 @@ public class MavibotPartitionBuilder } catch( Exception e ) { + e.printStackTrace(); LOG.warn( "Failed to build the presence index." ); LOG.warn( "", e ); return; @@ -1232,9 +1324,11 @@ public class MavibotPartitionBuilder return args[position]; } + public static void main( String[] args ) throws Exception { String inFile = null; + String configDir = null; String outDirPath = null; int numKeysInNode = 16; int rid = 1; @@ -1253,36 +1347,40 @@ public class MavibotPartitionBuilder switch( opt ) { - case HELP: + case HELP : help(); System.exit( 0 ); break; - case INPUT_FILE: + case INPUT_FILE : inFile = getArgAt( ++i, opt, args ); break; - case OUT_DIR: + case OUT_DIR : outDirPath = getArgAt( ++i, opt, args ); break; - case CLEAN_OUT_DIR: + case CLEAN_OUT_DIR : cleanOutDir = true; break; - case VERIFY_MASTER_TABLE: + case VERIFY_MASTER_TABLE : verifyMasterTable = true; break; - case NUM_KEYS_PER_NODE: + case NUM_KEYS_PER_NODE : numKeysInNode = Integer.parseInt( getArgAt( ++i, opt, args ) ); break; - case DS_RID: + case DS_RID : rid = Integer.parseInt( getArgAt( ++i, opt, args ) ); break; + + case CONFIG_DIR : + configDir = getArgAt( ++i, opt, args ); + break; - case UNKNOWN: + case UNKNOWN : System.out.println( "Unknown option " + args[i] ); continue; } @@ -1315,7 +1413,7 @@ public class MavibotPartitionBuilder FileUtils.deleteDirectory( outDir ); } - MavibotPartitionBuilder builder = new MavibotPartitionBuilder( inFile, outDirPath, numKeysInNode, rid ); + MavibotPartitionBuilder builder = new MavibotPartitionBuilder( configDir, inFile, outDirPath, numKeysInNode, rid ); long start = System.currentTimeMillis(); Modified: directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/Option.java URL: http://svn.apache.org/viewvc/directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/Option.java?rev=1615405&r1=1615404&r2=1615405&view=diff ============================================================================== --- directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/Option.java (original) +++ directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/Option.java Sun Aug 3 13:54:45 2014 @@ -21,7 +21,19 @@ package org.apache.directory.mavibot.btr /** - * Command line options for bulk loader + * Command line options for bulk loader. + * + * Here are the various options : + *
    + *
  • -c : The configuration directory
  • + *
  • -clean : delete the content of the output directory
  • + *
  • -h : gives the list of possible options
  • + *
  • -i : the LDIF file to be loaded
  • + *
  • -n : the number of keys stored in each node
  • + *
  • -o : the directory where the resulting partition will be stored
  • + *
  • -rid : the replica ID
  • + *
  • -verify : check that we have loaded all the entries in the MAsterTable
  • + *
* * @author Apache Directory Project */ @@ -39,6 +51,8 @@ public enum Option DS_RID("-rid", "(optional) The RID value to be used in the entryCSN values, default is 1"), + CONFIG_DIR("-c", "The configuration partition directory"), + VERIFY_MASTER_TABLE("-verify", "(optional) Verifies the master table by just browsing (entries are not verified)"), UNKNOWN(null, "Unknown Option"); @@ -105,6 +119,11 @@ public enum Option return NUM_KEYS_PER_NODE; } + if ( opt.equalsIgnoreCase( CONFIG_DIR.text ) ) + { + return CONFIG_DIR; + } + return UNKNOWN; } } Added: directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/PositionBufferedReader.java URL: http://svn.apache.org/viewvc/directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/PositionBufferedReader.java?rev=1615405&view=auto ============================================================================== --- directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/PositionBufferedReader.java (added) +++ directory/apacheds/trunk/bulkloader/src/main/java/org/apache/directory/mavibot/btree/PositionBufferedReader.java Sun Aug 3 13:54:45 2014 @@ -0,0 +1,438 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + */ + +package org.apache.directory.mavibot.btree; + + +import java.io.IOException; +import java.io.Reader; + + +/** + * Code taken from Harmony. + * + * This modified class keep a track of the current position in the file, + * whether the OS is linux/unix or Windows. + * + * Wraps an existing {@link Reader} and buffers the input. Expensive + * interaction with the underlying reader is minimized, since most (smaller) + * requests can be satisfied by accessing the buffer alone. The drawback is that + * some extra space is required to hold the buffer and that copying takes place + * when filling that buffer, but this is usually outweighed by the performance + * benefits. + * + *

A typical application pattern for the class looks like this:

+ * + *

+ * BufferedReader buf = new BufferedReader(new FileReader("file.java"));
+ * 
+ * + * @see BufferedWriter + * @since 1.1 + */ +public class PositionBufferedReader extends Reader +{ + + private Reader in; + + /** + * The characters that can be read and refilled in bulk. We maintain three + * indices into this buffer:
+     *     { X X X X X X X X X X X X - - }
+     *           ^     ^             ^
+     *           |     |             |
+     *         mark   pos           end
+ * Pos points to the next readable character. End is one greater than the + * last readable character. When {@code pos == end}, the buffer is empty and + * must be {@link #fillBuf() filled} before characters can be read. + * + *

Mark is the value pos will be set to on calls to {@link #reset}. Its + * value is in the range {@code [0...pos]}. If the mark is {@code -1}, the + * buffer cannot be reset. + * + *

MarkLimit limits the distance between the mark and the pos. When this + * limit is exceeded, {@link #reset} is permitted (but not required) to + * throw an exception. For shorter distances, {@link #reset} shall not throw + * (unless the reader is closed). + */ + private char[] buf; + + private int pos; + + private int end; + + private int mark = -1; + + private int markLimit = -1; + + /** The current position in the file */ + private long filePos; + + + /** + * Constructs a new BufferedReader on the Reader {@code in}. The + * buffer gets the default size (8 KB). + * + * @param in + * the Reader that is buffered. + */ + public PositionBufferedReader( Reader in ) + { + super( in ); + this.in = in; + buf = new char[8192]; + } + + + /** + * Closes this reader. This implementation closes the buffered source reader + * and releases the buffer. Nothing is done if this reader has already been + * closed. + * + * @throws IOException + * if an error occurs while closing this reader. + */ + @Override + public void close() throws IOException + { + synchronized ( lock ) + { + if ( !isClosed() ) + { + in.close(); + buf = null; + } + } + } + + + /** + * Populates the buffer with data. It is an error to call this method when + * the buffer still contains data; ie. if {@code pos < end}. + * + * @return the number of bytes read into the buffer, or -1 if the end of the + * source stream has been reached. + */ + private int fillBuf() throws IOException + { + // assert(pos == end); + + if ( mark == -1 || ( pos - mark >= markLimit ) ) + { + /* mark isn't set or has exceeded its limit. use the whole buffer */ + int result = in.read( buf, 0, buf.length ); + if ( result > 0 ) + { + mark = -1; + pos = 0; + end = result; + } + return result; + } + + if ( mark == 0 && markLimit > buf.length ) + { + /* the only way to make room when mark=0 is by growing the buffer */ + int newLength = buf.length * 2; + if ( newLength > markLimit ) + { + newLength = markLimit; + } + char[] newbuf = new char[newLength]; + System.arraycopy( buf, 0, newbuf, 0, buf.length ); + buf = newbuf; + } + else if ( mark > 0 ) + { + /* make room by shifting the buffered data to left mark positions */ + System.arraycopy( buf, mark, buf, 0, buf.length - mark ); + pos -= mark; + end -= mark; + mark = 0; + } + + /* Set the new position and mark position */ + int count = in.read( buf, pos, buf.length - pos ); + if ( count != -1 ) + { + end += count; + } + return count; + } + + + /** + * Indicates whether or not this reader is closed. + * + * @return {@code true} if this reader is closed, {@code false} + * otherwise. + */ + private boolean isClosed() + { + return buf == null; + } + + + /** + * Reads at most {@code length} characters from this reader and stores them + * at {@code offset} in the character array {@code buffer}. Returns the + * number of characters actually read or -1 if the end of the source reader + * has been reached. If all the buffered characters have been used, a mark + * has not been set and the requested number of characters is larger than + * this readers buffer size, BufferedReader bypasses the buffer and simply + * places the results directly into {@code buffer}. + * + * @param buffer + * the character array to store the characters read. + * @param offset + * the initial position in {@code buffer} to store the bytes read + * from this reader. + * @param length + * the maximum number of characters to read, must be + * non-negative. + * @return number of characters read or -1 if the end of the source reader + * has been reached. + * @throws IndexOutOfBoundsException + * if {@code offset < 0} or {@code length < 0}, or if + * {@code offset + length} is greater than the size of + * {@code buffer}. + * @throws IOException + * if this reader is closed or some other I/O error occurs. + */ + @Override + public int read( char[] buffer, int offset, int length ) throws IOException + { + synchronized ( lock ) + { + if ( isClosed() ) + { + throw new IOException( "" ); //$NON-NLS-1$ + } + if ( offset < 0 || offset > buffer.length - length || length < 0 ) + { + throw new IndexOutOfBoundsException(); + } + int outstanding = length; + while ( outstanding > 0 ) + { + + /* + * If there are bytes in the buffer, grab those first. + */ + int available = end - pos; + if ( available > 0 ) + { + int count = available >= outstanding ? outstanding : available; + System.arraycopy( buf, pos, buffer, offset, count ); + pos += count; + offset += count; + outstanding -= count; + } + + /* + * Before attempting to read from the underlying stream, make + * sure we really, really want to. We won't bother if we're + * done, or if we've already got some bytes and reading from the + * underlying stream would block. + */ + if ( outstanding == 0 || ( outstanding < length && !in.ready() ) ) + { + break; + } + + // assert(pos == end); + + /* + * If we're unmarked and the requested size is greater than our + * buffer, read the bytes directly into the caller's buffer. We + * don't read into smaller buffers because that could result in + * a many reads. + */ + if ( ( mark == -1 || ( pos - mark >= markLimit ) ) + && outstanding >= buf.length ) + { + int count = in.read( buffer, offset, outstanding ); + if ( count > 0 ) + { + offset += count; + outstanding -= count; + mark = -1; + } + + break; // assume the source stream gave us all that it could + } + + if ( fillBuf() == -1 ) + { + break; // source is exhausted + } + } + + int count = length - outstanding; + return ( count > 0 || count == length ) ? count : -1; + } + } + + + /** + * Returns the next line of text available from this reader. A line is + * represented by zero or more characters followed by {@code '\n'}, + * {@code '\r'}, {@code "\r\n"} or the end of the reader. The string does + * not include the newline sequence. + * + * @return the contents of the line or {@code null} if no characters were + * read before the end of the reader has been reached. + * @throws IOException + * if this reader is closed or some other I/O error occurs. + */ + public String readLine() throws IOException + { + synchronized ( lock ) + { + if ( isClosed() ) + { + throw new IOException( "File closed, cannot read from it" ); + } + + /* has the underlying stream been exhausted? */ + if ( pos == end && fillBuf() == -1 ) + { + return null; + } + + for ( int charPos = pos; charPos < end; charPos++ ) + { + char ch = buf[charPos]; + + if ( ch > '\r' ) + { + filePos++; + continue; + } + + if ( ch == '\n' ) + { + String res = new String( buf, pos, charPos - pos ); + pos = charPos + 1; + filePos++; + + return res; + } + else if ( ch == '\r' ) + { + String res = new String( buf, pos, charPos - pos ); + filePos++; + pos = charPos + 1; + + if ( ( ( pos < end ) || ( fillBuf() != -1 ) ) + && ( buf[pos] == '\n' ) ) + { + filePos++; + pos++; + } + + return res; + } + } + + char eol = '\0'; + StringBuilder result = new StringBuilder( 80 ); + /* Typical Line Length */ + + result.append( buf, pos, end - pos ); + + while ( true ) + { + pos = end; + + /* Are there buffered characters available? */ + if ( eol == '\n' ) + { + return result.toString(); + } + + // attempt to fill buffer + if ( fillBuf() == -1 ) + { + // characters or null. + return result.length() > 0 || eol != '\0' + ? result.toString() + : null; + } + + filePos--; + + for ( int charPos = pos; charPos < end; charPos++ ) + { + char c = buf[charPos]; + filePos++; + + if ( eol == '\0' ) + { + if ( ( c == '\n' || c == '\r' ) ) + { + eol = c; + } + } + else if ( eol == '\r' && c == '\n' ) + { + if ( charPos > pos ) + { + result.append( buf, pos, charPos - pos - 1 ); + } + + pos = charPos + 1; + + return result.toString(); + } + else + { + if ( charPos > pos ) + { + result.append( buf, pos, charPos - pos - 1 ); + } + + pos = charPos; + + return result.toString(); + } + } + + if ( eol == '\0' ) + { + result.append( buf, pos, end - pos ); + } + else + { + result.append( buf, pos, end - pos - 1 ); + } + } + } + } + + + /** + * @return the filePos + */ + public long getFilePos() + { + return filePos; + } +} \ No newline at end of file Modified: directory/apacheds/trunk/bulkloader/src/test/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilderTest.java URL: http://svn.apache.org/viewvc/directory/apacheds/trunk/bulkloader/src/test/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilderTest.java?rev=1615405&r1=1615404&r2=1615405&view=diff ============================================================================== --- directory/apacheds/trunk/bulkloader/src/test/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilderTest.java (original) +++ directory/apacheds/trunk/bulkloader/src/test/java/org/apache/directory/mavibot/btree/MavibotPartitionBuilderTest.java Sun Aug 3 13:54:45 2014 @@ -24,10 +24,12 @@ import java.io.File; import java.io.InputStream; import org.apache.commons.io.FileUtils; +import org.apache.directory.server.config.LdifConfigExtractor; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; + import static org.junit.Assert.*; /** @@ -40,24 +42,34 @@ public class MavibotPartitionBuilderTest @Rule public TemporaryFolder folder = new TemporaryFolder(); - private File outDir; + /** The configuration file */ + String configFile; + private File outDir; + + private static File workDir = new File( System.getProperty( "java.io.tmpdir" ) + "/server-work" ); + @Before public void init() throws Exception { outDir = folder.newFolder( "MavibotPartitionBuilderTest" ); + workDir.mkdir(); + File configDir = new File( workDir, "config" ); // could be any directory, cause the config is now in a single file + + configFile = LdifConfigExtractor.extractSingleFileConfig( configDir, "config.ldif", true ); } @Test public void testBulkLoad() throws Exception { - - File file = new File( outDir, "builder-test.ldif" ); - InputStream in = MavibotPartitionBuilder.class.getClassLoader().getResourceAsStream( "builder-test.ldif" ); + //File file = new File( outDir, "builder-test.ldif" ); + File file = new File( outDir, "30k.ldif" ); + //InputStream in = MavibotPartitionBuilder.class.getClassLoader().getResourceAsStream( "builder-test.ldif" ); + InputStream in = MavibotPartitionBuilder.class.getClassLoader().getResourceAsStream( "30k.ldif" ); FileUtils.copyInputStreamToFile( in, file ); in.close(); - MavibotPartitionBuilder builder = new MavibotPartitionBuilder( file.getAbsolutePath(), outDir.getAbsolutePath() ); + MavibotPartitionBuilder builder = new MavibotPartitionBuilder( configFile, file.getAbsolutePath(), outDir.getAbsolutePath() ); builder.buildPartition();