incubator-blur-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Aaron McCurry <amccu...@gmail.com>
Subject Re: [03/13] git commit: Third round of updates.
Date Tue, 30 Aug 2016 11:49:04 GMT
I apologize for the big commits without proper messaging.  It was difficult
to remember the changs and the original commit messages were lost due to an
offline git repo (which is no longer is use).  I only had the diff between
the original git repo and everything after the changes.  Plus the diff
didn't apply cleanly so that's why I broke it up in to different sections.

I suppose I should have broke up the changes manually out of the diff and
applied them separately and recreated all the commit messages but I didn't
have the time to work through all of them.  Sorry.

Aaron


On Tuesday, August 30, 2016, Tim Williams <williamstw@gmail.com> wrote:

> NoNot sure what this is yet but itPlease be more considerate with your
> commit messages... it's a lot of code to look through without having
> any context besides "N round of updates."
>
>
> On Mon, Aug 29, 2016 at 9:57 PM,  <amccurry@apache.org <javascript:;>>
> wrote:
> > Third round of updates.
> >
> >
> > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
> > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/
> commit/ea50630a
> > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/
> ea50630a
> > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/
> ea50630a
> >
> > Branch: refs/heads/master
> > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a
> > Parents: 0141656
> > Author: Aaron McCurry <amccurry@gmail.com <javascript:;>>
> > Authored: Sat May 7 13:11:54 2016 -0400
> > Committer: Aaron McCurry <amccurry@gmail.com <javascript:;>>
> > Committed: Sat May 7 13:11:54 2016 -0400
> >
> > ----------------------------------------------------------------------
> >  blur-indexer/pom.xml                            |  58 +++
> >  blur-indexer/src/main/assemble/bin.xml          |  45 ++
> >  .../mapreduce/lib/update/BlurIndexCounter.java  |  17 +
> >  .../mapreduce/lib/update/ClusterDriver.java     | 362 ++++++++++++++
> >  .../blur/mapreduce/lib/update/FasterDriver.java | 486
> +++++++++++++++++++
> >  .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++
> >  .../lib/update/InputSplitPruneUtil.java         | 133 +++++
> >  .../lib/update/LookupBuilderMapper.java         |  18 +
> >  .../lib/update/LookupBuilderReducer.java        | 165 +++++++
> >  .../lib/update/MapperForExistingDataMod.java    |  46 ++
> >  .../MapperForExistingDataWithIndexLookup.java   | 228 +++++++++
> >  .../lib/update/MapperForNewDataMod.java         |  82 ++++
> >  .../lib/update/MergeSortRowIdMatcher.java       | 372 ++++++++++++++
> >  .../lib/update/PrunedBlurInputFormat.java       |  57 +++
> >  .../update/PrunedSequenceFileInputFormat.java   |  59 +++
> >  .../src/main/resources/blur-site.properties     |   1 +
> >  .../src/main/resources/program-log4j.xml        |  29 ++
> >  blur-indexer/src/main/resources/test-log4j.xml  |  46 ++
> >  18 files changed, 2319 insertions(+)
> > ----------------------------------------------------------------------
> >
> >
> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> ea50630a/blur-indexer/pom.xml
> > ----------------------------------------------------------------------
> > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml
> > new file mode 100644
> > index 0000000..c7c1753
> > --- /dev/null
> > +++ b/blur-indexer/pom.xml
> > @@ -0,0 +1,58 @@
> > +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="
> http://www.w3.org/2001/XMLSchema-instance"
> > +       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
> http://maven.apache.org/xsd/maven-4.0.0.xsd">
> > +       <modelVersion>4.0.0</modelVersion>
> > +       <groupId>org.apache.blur</groupId>
> > +       <artifactId>blur-indexer</artifactId>
> > +       <version>0.2.8</version>
> > +       <name>blur-indexer</name>
> > +       <packaging>jar</packaging>
> > +
> > +       <properties>
> > +               <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3-
> SNAPSHOT</blur.version>
> > +       </properties>
> > +       <dependencies>
> > +               <dependency>
> > +                       <groupId>org.apache.blur</groupId>
> > +                       <artifactId>blur-mapred</artifactId>
> > +                       <version>${blur.version}</version>
> > +               </dependency>
> > +               <dependency>
> > +                       <groupId>junit</groupId>
> > +                       <artifactId>junit</artifactId>
> > +                       <version>4.9</version>
> > +                       <scope>test</scope>
> > +               </dependency>
> > +       </dependencies>
> > +
> > +       <build>
> > +               <pluginManagement>
> > +                       <plugins>
> > +                               <plugin>
> > +                                       <groupId>org.apache.maven.
> plugins</groupId>
> > +                                       <artifactId>maven-compiler-
> plugin</artifactId>
> > +                                       <configuration>
> > +                                               <source>1.8</source>
> > +                                               <target>1.8</target>
> > +                                       </configuration>
> > +                               </plugin>
> > +                       </plugins>
> > +               </pluginManagement>
> > +               <plugins>
> > +                       <plugin>
> > +                               <artifactId>maven-assembly-
> plugin</artifactId>
> > +                               <configuration>
> > +                                       <descriptor>src/main/assemble/
> bin.xml</descriptor>
> > +                                       <finalName>blur-indexer-${
> project.version}</finalName>
> > +                               </configuration>
> > +                               <executions>
> > +                                       <execution>
> > +                                               <phase>package</phase>
> > +                                               <goals>
> > +
>  <goal>single</goal>
> > +                                               </goals>
> > +                                       </execution>
> > +                               </executions>
> > +                       </plugin>
> > +               </plugins>
> > +       </build>
> > +</project>
> >
> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> ea50630a/blur-indexer/src/main/assemble/bin.xml
> > ----------------------------------------------------------------------
> > diff --git a/blur-indexer/src/main/assemble/bin.xml
> b/blur-indexer/src/main/assemble/bin.xml
> > new file mode 100644
> > index 0000000..5fddd56
> > --- /dev/null
> > +++ b/blur-indexer/src/main/assemble/bin.xml
> > @@ -0,0 +1,45 @@
> > +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-
> plugin/assembly/1.1.2"
> > +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> > +           xsi:schemaLocation="http://maven.apache.org/plugins/
> maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/
> assembly-1.1.2.xsd">
> > +  <formats>
> > +    <format>tar.gz</format>
> > +  </formats>
> > +  <includeBaseDirectory>false</includeBaseDirectory>
> > +
> > +  <dependencySets>
> > +    <dependencySet>
> > +      <useProjectArtifact>true</useProjectArtifact>
> > +      <outputDirectory>blur-indexer-${project.version}/lib</
> outputDirectory>
> > +      <unpack>false</unpack>
> > +      <includes>
> > +        <include>org.apache.blur:blur-indexer</include>
> > +        <include>org.apache.blur:*</include>
> > +        <include>org.apache.zookeeper:zookeeper</include>
> > +        <include>org.slf4j:slf4j-api</include>
> > +        <include>org.slf4j:slf4j-log4j12</include>
> > +        <include>org.json:json</include>
> > +        <include>log4j:log4j</include>
> > +        <include>com.yammer.metrics:*</include>
> > +        <include>com.google.guava:guava</include>
> > +        <include>org.apache.httpcomponents:*</include>
> > +        <include>org.apache.lucene:*</include>
> > +        <include>com.spatial4j:spatial4j</include>
> > +        <include>commons-cli:commons-cli</include>
> > +        <include>org.eclipse.jetty:*</include>
> > +        <include>com.googlecode.concurrentlinkedhashmap:
> concurrentlinkedhashmap-lru</include>
> > +        <include>jline:jline</include>
> > +        <include>com.fasterxml.jackson.core:*</include>
> > +      </includes>
> > +    </dependencySet>
> > +  </dependencySets>
> > +
> > +  <fileSets>
> > +    <fileSet>
> > +      <directory>${project.build.scriptSourceDirectory}</directory>
> > +      <outputDirectory>blur-indexer-${project.version}</
> outputDirectory>
> > +      <excludes>
> > +        <exclude>**/.empty</exclude>
> > +      </excludes>
> > +    </fileSet>
> > +  </fileSets>
> > +</assembly>
> >
> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/
> BlurIndexCounter.java
> > ----------------------------------------------------------------------
> > diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> update/BlurIndexCounter.java
> > new file mode 100644
> > index 0000000..a9caabb
> > --- /dev/null
> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> update/BlurIndexCounter.java
> > @@ -0,0 +1,17 @@
> > +package org.apache.blur.mapreduce.lib.update;
> > +
> > +public enum BlurIndexCounter {
> > +
> > +  NEW_RECORDS,
> > +  ROW_IDS_FROM_INDEX,
> > +  ROW_IDS_TO_UPDATE_FROM_NEW_DATA,
> > +  ROW_IDS_FROM_NEW_DATA,
> > +
> > +  INPUT_FORMAT_MAPPER,
> > +  INPUT_FORMAT_EXISTING_RECORDS,
> > +
> > +  LOOKUP_MAPPER,
> > +  LOOKUP_MAPPER_EXISTING_RECORDS,
> > +  LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT
> > +
> > +}
> >
> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/
> ClusterDriver.java
> > ----------------------------------------------------------------------
> > diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> update/ClusterDriver.java
> > new file mode 100644
> > index 0000000..d44adf1
> > --- /dev/null
> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> update/ClusterDriver.java
> > @@ -0,0 +1,362 @@
> > +package org.apache.blur.mapreduce.lib.update;
> > +
> > +import java.io.ByteArrayInputStream;
> > +import java.io.ByteArrayOutputStream;
> > +import java.io.IOException;
> > +import java.io.InputStream;
> > +import java.net.URL;
> > +import java.util.HashMap;
> > +import java.util.HashSet;
> > +import java.util.List;
> > +import java.util.Map;
> > +import java.util.Map.Entry;
> > +import java.util.Set;
> > +import java.util.UUID;
> > +import java.util.concurrent.Callable;
> > +import java.util.concurrent.ExecutionException;
> > +import java.util.concurrent.ExecutorService;
> > +import java.util.concurrent.Executors;
> > +import java.util.concurrent.Future;
> > +import java.util.concurrent.TimeUnit;
> > +import java.util.concurrent.atomic.AtomicBoolean;
> > +
> > +import org.apache.blur.log.Log;
> > +import org.apache.blur.log.LogFactory;
> > +import org.apache.blur.mapreduce.lib.BlurInputFormat;
> > +import org.apache.blur.thirdparty.thrift_0_9_0.TException;
> > +import org.apache.blur.thrift.BlurClient;
> > +import org.apache.blur.thrift.generated.Blur.Iface;
> > +import org.apache.blur.thrift.generated.BlurException;
> > +import org.apache.blur.thrift.generated.TableDescriptor;
> > +import org.apache.blur.thrift.generated.TableStats;
> > +import org.apache.blur.utils.BlurConstants;
> > +import org.apache.commons.io.IOUtils;
> > +import org.apache.hadoop.conf.Configuration;
> > +import org.apache.hadoop.conf.Configured;
> > +import org.apache.hadoop.fs.FSDataInputStream;
> > +import org.apache.hadoop.fs.FileStatus;
> > +import org.apache.hadoop.fs.FileSystem;
> > +import org.apache.hadoop.fs.Path;
> > +import org.apache.hadoop.fs.permission.FsAction;
> > +import org.apache.hadoop.mapreduce.Cluster;
> > +import org.apache.hadoop.mapreduce.Job;
> > +import org.apache.hadoop.mapreduce.JobID;
> > +import org.apache.hadoop.mapreduce.JobStatus;
> > +import org.apache.hadoop.util.Tool;
> > +import org.apache.hadoop.util.ToolRunner;
> > +import org.apache.hadoop.yarn.exceptions.YarnException;
> > +import org.apache.log4j.LogManager;
> > +import org.apache.log4j.xml.DOMConfigurator;
> > +
> > +public class ClusterDriver extends Configured implements Tool {
> > +
> > +  private static final String BLUR_ENV = "blur.env";
> > +  private static final Log LOG = LogFactory.getLog(
> ClusterDriver.class);
> > +  private static final String _SEP = "_";
> > +  private static final String IMPORT = "import";
> > +
> > +  public static void main(String[] args) throws Exception {
> > +    String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE");
> > +    System.out.println("Log file path [" + logFilePath + "]");
> > +    System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath);
> > +    URL url = ClusterDriver.class.getResource("/program-log4j.xml");
> > +    if (url != null) {
> > +      LOG.info("Reseting log4j config from classpath resource [{0}]",
> url);
> > +      LogManager.resetConfiguration();
> > +      DOMConfigurator.configure(url);
> > +    }
> > +    int res = ToolRunner.run(new Configuration(), new ClusterDriver(),
> args);
>
> Not sure what this thing does yet but it seems we should validate
> those args since their accessed blindly in run...
>
> --tim
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message