incubator-blur-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Tim Williams <william...@gmail.com>
Subject Re: [04/13] git commit: Third round of updates.
Date Tue, 30 Aug 2016 11:05:36 GMT
NoNot sure what this is yet but itPlease be more considerate with your
commit messages... it's a lot of code to look through without having
any context besides "N round of updates."


On Mon, Aug 29, 2016 at 9:57 PM,  <amccurry@apache.org> wrote:
> Third round of updates.
>
>
> Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
> Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/commit/ea50630a
> Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/ea50630a
> Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/ea50630a
>
> Branch: refs/heads/master
> Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a
> Parents: 0141656
> Author: Aaron McCurry <amccurry@gmail.com>
> Authored: Sat May 7 13:11:54 2016 -0400
> Committer: Aaron McCurry <amccurry@gmail.com>
> Committed: Sat May 7 13:11:54 2016 -0400
>
> ----------------------------------------------------------------------
>  blur-indexer/pom.xml                            |  58 +++
>  blur-indexer/src/main/assemble/bin.xml          |  45 ++
>  .../mapreduce/lib/update/BlurIndexCounter.java  |  17 +
>  .../mapreduce/lib/update/ClusterDriver.java     | 362 ++++++++++++++
>  .../blur/mapreduce/lib/update/FasterDriver.java | 486 +++++++++++++++++++
>  .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++
>  .../lib/update/InputSplitPruneUtil.java         | 133 +++++
>  .../lib/update/LookupBuilderMapper.java         |  18 +
>  .../lib/update/LookupBuilderReducer.java        | 165 +++++++
>  .../lib/update/MapperForExistingDataMod.java    |  46 ++
>  .../MapperForExistingDataWithIndexLookup.java   | 228 +++++++++
>  .../lib/update/MapperForNewDataMod.java         |  82 ++++
>  .../lib/update/MergeSortRowIdMatcher.java       | 372 ++++++++++++++
>  .../lib/update/PrunedBlurInputFormat.java       |  57 +++
>  .../update/PrunedSequenceFileInputFormat.java   |  59 +++
>  .../src/main/resources/blur-site.properties     |   1 +
>  .../src/main/resources/program-log4j.xml        |  29 ++
>  blur-indexer/src/main/resources/test-log4j.xml  |  46 ++
>  18 files changed, 2319 insertions(+)
> ----------------------------------------------------------------------
>
>
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/pom.xml
> ----------------------------------------------------------------------
> diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml
> new file mode 100644
> index 0000000..c7c1753
> --- /dev/null
> +++ b/blur-indexer/pom.xml
> @@ -0,0 +1,58 @@
> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> +       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
> +       <modelVersion>4.0.0</modelVersion>
> +       <groupId>org.apache.blur</groupId>
> +       <artifactId>blur-indexer</artifactId>
> +       <version>0.2.8</version>
> +       <name>blur-indexer</name>
> +       <packaging>jar</packaging>
> +
> +       <properties>
> +               <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3-SNAPSHOT</blur.version>
> +       </properties>
> +       <dependencies>
> +               <dependency>
> +                       <groupId>org.apache.blur</groupId>
> +                       <artifactId>blur-mapred</artifactId>
> +                       <version>${blur.version}</version>
> +               </dependency>
> +               <dependency>
> +                       <groupId>junit</groupId>
> +                       <artifactId>junit</artifactId>
> +                       <version>4.9</version>
> +                       <scope>test</scope>
> +               </dependency>
> +       </dependencies>
> +
> +       <build>
> +               <pluginManagement>
> +                       <plugins>
> +                               <plugin>
> +                                       <groupId>org.apache.maven.plugins</groupId>
> +                                       <artifactId>maven-compiler-plugin</artifactId>
> +                                       <configuration>
> +                                               <source>1.8</source>
> +                                               <target>1.8</target>
> +                                       </configuration>
> +                               </plugin>
> +                       </plugins>
> +               </pluginManagement>
> +               <plugins>
> +                       <plugin>
> +                               <artifactId>maven-assembly-plugin</artifactId>
> +                               <configuration>
> +                                       <descriptor>src/main/assemble/bin.xml</descriptor>
> +                                       <finalName>blur-indexer-${project.version}</finalName>
> +                               </configuration>
> +                               <executions>
> +                                       <execution>
> +                                               <phase>package</phase>
> +                                               <goals>
> +                                                       <goal>single</goal>
> +                                               </goals>
> +                                       </execution>
> +                               </executions>
> +                       </plugin>
> +               </plugins>
> +       </build>
> +</project>
>
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/src/main/assemble/bin.xml
> ----------------------------------------------------------------------
> diff --git a/blur-indexer/src/main/assemble/bin.xml b/blur-indexer/src/main/assemble/bin.xml
> new file mode 100644
> index 0000000..5fddd56
> --- /dev/null
> +++ b/blur-indexer/src/main/assemble/bin.xml
> @@ -0,0 +1,45 @@
> +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
> +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> +           xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2
http://maven.apache.org/xsd/assembly-1.1.2.xsd">
> +  <formats>
> +    <format>tar.gz</format>
> +  </formats>
> +  <includeBaseDirectory>false</includeBaseDirectory>
> +
> +  <dependencySets>
> +    <dependencySet>
> +      <useProjectArtifact>true</useProjectArtifact>
> +      <outputDirectory>blur-indexer-${project.version}/lib</outputDirectory>
> +      <unpack>false</unpack>
> +      <includes>
> +        <include>org.apache.blur:blur-indexer</include>
> +        <include>org.apache.blur:*</include>
> +        <include>org.apache.zookeeper:zookeeper</include>
> +        <include>org.slf4j:slf4j-api</include>
> +        <include>org.slf4j:slf4j-log4j12</include>
> +        <include>org.json:json</include>
> +        <include>log4j:log4j</include>
> +        <include>com.yammer.metrics:*</include>
> +        <include>com.google.guava:guava</include>
> +        <include>org.apache.httpcomponents:*</include>
> +        <include>org.apache.lucene:*</include>
> +        <include>com.spatial4j:spatial4j</include>
> +        <include>commons-cli:commons-cli</include>
> +        <include>org.eclipse.jetty:*</include>
> +        <include>com.googlecode.concurrentlinkedhashmap:concurrentlinkedhashmap-lru</include>
> +        <include>jline:jline</include>
> +        <include>com.fasterxml.jackson.core:*</include>
> +      </includes>
> +    </dependencySet>
> +  </dependencySets>
> +
> +  <fileSets>
> +    <fileSet>
> +      <directory>${project.build.scriptSourceDirectory}</directory>
> +      <outputDirectory>blur-indexer-${project.version}</outputDirectory>
> +      <excludes>
> +        <exclude>**/.empty</exclude>
> +      </excludes>
> +    </fileSet>
> +  </fileSets>
> +</assembly>
>
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> ----------------------------------------------------------------------
> diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> new file mode 100644
> index 0000000..a9caabb
> --- /dev/null
> +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> @@ -0,0 +1,17 @@
> +package org.apache.blur.mapreduce.lib.update;
> +
> +public enum BlurIndexCounter {
> +
> +  NEW_RECORDS,
> +  ROW_IDS_FROM_INDEX,
> +  ROW_IDS_TO_UPDATE_FROM_NEW_DATA,
> +  ROW_IDS_FROM_NEW_DATA,
> +
> +  INPUT_FORMAT_MAPPER,
> +  INPUT_FORMAT_EXISTING_RECORDS,
> +
> +  LOOKUP_MAPPER,
> +  LOOKUP_MAPPER_EXISTING_RECORDS,
> +  LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT
> +
> +}
>
> http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> ----------------------------------------------------------------------
> diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> new file mode 100644
> index 0000000..d44adf1
> --- /dev/null
> +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> @@ -0,0 +1,362 @@
> +package org.apache.blur.mapreduce.lib.update;
> +
> +import java.io.ByteArrayInputStream;
> +import java.io.ByteArrayOutputStream;
> +import java.io.IOException;
> +import java.io.InputStream;
> +import java.net.URL;
> +import java.util.HashMap;
> +import java.util.HashSet;
> +import java.util.List;
> +import java.util.Map;
> +import java.util.Map.Entry;
> +import java.util.Set;
> +import java.util.UUID;
> +import java.util.concurrent.Callable;
> +import java.util.concurrent.ExecutionException;
> +import java.util.concurrent.ExecutorService;
> +import java.util.concurrent.Executors;
> +import java.util.concurrent.Future;
> +import java.util.concurrent.TimeUnit;
> +import java.util.concurrent.atomic.AtomicBoolean;
> +
> +import org.apache.blur.log.Log;
> +import org.apache.blur.log.LogFactory;
> +import org.apache.blur.mapreduce.lib.BlurInputFormat;
> +import org.apache.blur.thirdparty.thrift_0_9_0.TException;
> +import org.apache.blur.thrift.BlurClient;
> +import org.apache.blur.thrift.generated.Blur.Iface;
> +import org.apache.blur.thrift.generated.BlurException;
> +import org.apache.blur.thrift.generated.TableDescriptor;
> +import org.apache.blur.thrift.generated.TableStats;
> +import org.apache.blur.utils.BlurConstants;
> +import org.apache.commons.io.IOUtils;
> +import org.apache.hadoop.conf.Configuration;
> +import org.apache.hadoop.conf.Configured;
> +import org.apache.hadoop.fs.FSDataInputStream;
> +import org.apache.hadoop.fs.FileStatus;
> +import org.apache.hadoop.fs.FileSystem;
> +import org.apache.hadoop.fs.Path;
> +import org.apache.hadoop.fs.permission.FsAction;
> +import org.apache.hadoop.mapreduce.Cluster;
> +import org.apache.hadoop.mapreduce.Job;
> +import org.apache.hadoop.mapreduce.JobID;
> +import org.apache.hadoop.mapreduce.JobStatus;
> +import org.apache.hadoop.util.Tool;
> +import org.apache.hadoop.util.ToolRunner;
> +import org.apache.hadoop.yarn.exceptions.YarnException;
> +import org.apache.log4j.LogManager;
> +import org.apache.log4j.xml.DOMConfigurator;
> +
> +public class ClusterDriver extends Configured implements Tool {
> +
> +  private static final String BLUR_ENV = "blur.env";
> +  private static final Log LOG = LogFactory.getLog(ClusterDriver.class);
> +  private static final String _SEP = "_";
> +  private static final String IMPORT = "import";
> +
> +  public static void main(String[] args) throws Exception {
> +    String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE");
> +    System.out.println("Log file path [" + logFilePath + "]");
> +    System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath);
> +    URL url = ClusterDriver.class.getResource("/program-log4j.xml");
> +    if (url != null) {
> +      LOG.info("Reseting log4j config from classpath resource [{0}]", url);
> +      LogManager.resetConfiguration();
> +      DOMConfigurator.configure(url);
> +    }
> +    int res = ToolRunner.run(new Configuration(), new ClusterDriver(), args);

Not sure what this thing does yet but it seems we should validate
those args since their accessed blindly in run...

--tim

Mime
View raw message