incubator-blur-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Tim Williams <william...@gmail.com>
Subject Re: [03/13] git commit: Third round of updates.
Date Tue, 30 Aug 2016 13:10:31 GMT
No worries, just a friendly reminder:)  If you get time, I think it'd
be helpful to a couple sentences about any new stuff/big changes...
seems like there's a new project for example...

Thanks,
--tim


On Tue, Aug 30, 2016 at 7:49 AM, Aaron McCurry <amccurry@gmail.com> wrote:
> I apologize for the big commits without proper messaging.  It was difficult
> to remember the changs and the original commit messages were lost due to an
> offline git repo (which is no longer is use).  I only had the diff between
> the original git repo and everything after the changes.  Plus the diff
> didn't apply cleanly so that's why I broke it up in to different sections.
>
> I suppose I should have broke up the changes manually out of the diff and
> applied them separately and recreated all the commit messages but I didn't
> have the time to work through all of them.  Sorry.
>
> Aaron
>
>
> On Tuesday, August 30, 2016, Tim Williams <williamstw@gmail.com> wrote:
>
>> NoNot sure what this is yet but itPlease be more considerate with your
>> commit messages... it's a lot of code to look through without having
>> any context besides "N round of updates."
>>
>>
>> On Mon, Aug 29, 2016 at 9:57 PM,  <amccurry@apache.org <javascript:;>>
>> wrote:
>> > Third round of updates.
>> >
>> >
>> > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
>> > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/
>> commit/ea50630a
>> > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/
>> ea50630a
>> > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/
>> ea50630a
>> >
>> > Branch: refs/heads/master
>> > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a
>> > Parents: 0141656
>> > Author: Aaron McCurry <amccurry@gmail.com <javascript:;>>
>> > Authored: Sat May 7 13:11:54 2016 -0400
>> > Committer: Aaron McCurry <amccurry@gmail.com <javascript:;>>
>> > Committed: Sat May 7 13:11:54 2016 -0400
>> >
>> > ----------------------------------------------------------------------
>> >  blur-indexer/pom.xml                            |  58 +++
>> >  blur-indexer/src/main/assemble/bin.xml          |  45 ++
>> >  .../mapreduce/lib/update/BlurIndexCounter.java  |  17 +
>> >  .../mapreduce/lib/update/ClusterDriver.java     | 362 ++++++++++++++
>> >  .../blur/mapreduce/lib/update/FasterDriver.java | 486
>> +++++++++++++++++++
>> >  .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++
>> >  .../lib/update/InputSplitPruneUtil.java         | 133 +++++
>> >  .../lib/update/LookupBuilderMapper.java         |  18 +
>> >  .../lib/update/LookupBuilderReducer.java        | 165 +++++++
>> >  .../lib/update/MapperForExistingDataMod.java    |  46 ++
>> >  .../MapperForExistingDataWithIndexLookup.java   | 228 +++++++++
>> >  .../lib/update/MapperForNewDataMod.java         |  82 ++++
>> >  .../lib/update/MergeSortRowIdMatcher.java       | 372 ++++++++++++++
>> >  .../lib/update/PrunedBlurInputFormat.java       |  57 +++
>> >  .../update/PrunedSequenceFileInputFormat.java   |  59 +++
>> >  .../src/main/resources/blur-site.properties     |   1 +
>> >  .../src/main/resources/program-log4j.xml        |  29 ++
>> >  blur-indexer/src/main/resources/test-log4j.xml  |  46 ++
>> >  18 files changed, 2319 insertions(+)
>> > ----------------------------------------------------------------------
>> >
>> >
>> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
>> ea50630a/blur-indexer/pom.xml
>> > ----------------------------------------------------------------------
>> > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml
>> > new file mode 100644
>> > index 0000000..c7c1753
>> > --- /dev/null
>> > +++ b/blur-indexer/pom.xml
>> > @@ -0,0 +1,58 @@
>> > +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="
>> http://www.w3.org/2001/XMLSchema-instance"
>> > +       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
>> http://maven.apache.org/xsd/maven-4.0.0.xsd">
>> > +       <modelVersion>4.0.0</modelVersion>
>> > +       <groupId>org.apache.blur</groupId>
>> > +       <artifactId>blur-indexer</artifactId>
>> > +       <version>0.2.8</version>
>> > +       <name>blur-indexer</name>
>> > +       <packaging>jar</packaging>
>> > +
>> > +       <properties>
>> > +               <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3-
>> SNAPSHOT</blur.version>
>> > +       </properties>
>> > +       <dependencies>
>> > +               <dependency>
>> > +                       <groupId>org.apache.blur</groupId>
>> > +                       <artifactId>blur-mapred</artifactId>
>> > +                       <version>${blur.version}</version>
>> > +               </dependency>
>> > +               <dependency>
>> > +                       <groupId>junit</groupId>
>> > +                       <artifactId>junit</artifactId>
>> > +                       <version>4.9</version>
>> > +                       <scope>test</scope>
>> > +               </dependency>
>> > +       </dependencies>
>> > +
>> > +       <build>
>> > +               <pluginManagement>
>> > +                       <plugins>
>> > +                               <plugin>
>> > +                                       <groupId>org.apache.maven.
>> plugins</groupId>
>> > +                                       <artifactId>maven-compiler-
>> plugin</artifactId>
>> > +                                       <configuration>
>> > +                                               <source>1.8</source>
>> > +                                               <target>1.8</target>
>> > +                                       </configuration>
>> > +                               </plugin>
>> > +                       </plugins>
>> > +               </pluginManagement>
>> > +               <plugins>
>> > +                       <plugin>
>> > +                               <artifactId>maven-assembly-
>> plugin</artifactId>
>> > +                               <configuration>
>> > +                                       <descriptor>src/main/assemble/
>> bin.xml</descriptor>
>> > +                                       <finalName>blur-indexer-${
>> project.version}</finalName>
>> > +                               </configuration>
>> > +                               <executions>
>> > +                                       <execution>
>> > +                                               <phase>package</phase>
>> > +                                               <goals>
>> > +
>>  <goal>single</goal>
>> > +                                               </goals>
>> > +                                       </execution>
>> > +                               </executions>
>> > +                       </plugin>
>> > +               </plugins>
>> > +       </build>
>> > +</project>
>> >
>> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
>> ea50630a/blur-indexer/src/main/assemble/bin.xml
>> > ----------------------------------------------------------------------
>> > diff --git a/blur-indexer/src/main/assemble/bin.xml
>> b/blur-indexer/src/main/assemble/bin.xml
>> > new file mode 100644
>> > index 0000000..5fddd56
>> > --- /dev/null
>> > +++ b/blur-indexer/src/main/assemble/bin.xml
>> > @@ -0,0 +1,45 @@
>> > +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-
>> plugin/assembly/1.1.2"
>> > +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
>> > +           xsi:schemaLocation="http://maven.apache.org/plugins/
>> maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/
>> assembly-1.1.2.xsd">
>> > +  <formats>
>> > +    <format>tar.gz</format>
>> > +  </formats>
>> > +  <includeBaseDirectory>false</includeBaseDirectory>
>> > +
>> > +  <dependencySets>
>> > +    <dependencySet>
>> > +      <useProjectArtifact>true</useProjectArtifact>
>> > +      <outputDirectory>blur-indexer-${project.version}/lib</
>> outputDirectory>
>> > +      <unpack>false</unpack>
>> > +      <includes>
>> > +        <include>org.apache.blur:blur-indexer</include>
>> > +        <include>org.apache.blur:*</include>
>> > +        <include>org.apache.zookeeper:zookeeper</include>
>> > +        <include>org.slf4j:slf4j-api</include>
>> > +        <include>org.slf4j:slf4j-log4j12</include>
>> > +        <include>org.json:json</include>
>> > +        <include>log4j:log4j</include>
>> > +        <include>com.yammer.metrics:*</include>
>> > +        <include>com.google.guava:guava</include>
>> > +        <include>org.apache.httpcomponents:*</include>
>> > +        <include>org.apache.lucene:*</include>
>> > +        <include>com.spatial4j:spatial4j</include>
>> > +        <include>commons-cli:commons-cli</include>
>> > +        <include>org.eclipse.jetty:*</include>
>> > +        <include>com.googlecode.concurrentlinkedhashmap:
>> concurrentlinkedhashmap-lru</include>
>> > +        <include>jline:jline</include>
>> > +        <include>com.fasterxml.jackson.core:*</include>
>> > +      </includes>
>> > +    </dependencySet>
>> > +  </dependencySets>
>> > +
>> > +  <fileSets>
>> > +    <fileSet>
>> > +      <directory>${project.build.scriptSourceDirectory}</directory>
>> > +      <outputDirectory>blur-indexer-${project.version}</
>> outputDirectory>
>> > +      <excludes>
>> > +        <exclude>**/.empty</exclude>
>> > +      </excludes>
>> > +    </fileSet>
>> > +  </fileSets>
>> > +</assembly>
>> >
>> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
>> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/
>> BlurIndexCounter.java
>> > ----------------------------------------------------------------------
>> > diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
>> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
>> update/BlurIndexCounter.java
>> > new file mode 100644
>> > index 0000000..a9caabb
>> > --- /dev/null
>> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
>> update/BlurIndexCounter.java
>> > @@ -0,0 +1,17 @@
>> > +package org.apache.blur.mapreduce.lib.update;
>> > +
>> > +public enum BlurIndexCounter {
>> > +
>> > +  NEW_RECORDS,
>> > +  ROW_IDS_FROM_INDEX,
>> > +  ROW_IDS_TO_UPDATE_FROM_NEW_DATA,
>> > +  ROW_IDS_FROM_NEW_DATA,
>> > +
>> > +  INPUT_FORMAT_MAPPER,
>> > +  INPUT_FORMAT_EXISTING_RECORDS,
>> > +
>> > +  LOOKUP_MAPPER,
>> > +  LOOKUP_MAPPER_EXISTING_RECORDS,
>> > +  LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT
>> > +
>> > +}
>> >
>> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
>> ea50630a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/
>> ClusterDriver.java
>> > ----------------------------------------------------------------------
>> > diff --git a/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/update/ClusterDriver.java
>> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
>> update/ClusterDriver.java
>> > new file mode 100644
>> > index 0000000..d44adf1
>> > --- /dev/null
>> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
>> update/ClusterDriver.java
>> > @@ -0,0 +1,362 @@
>> > +package org.apache.blur.mapreduce.lib.update;
>> > +
>> > +import java.io.ByteArrayInputStream;
>> > +import java.io.ByteArrayOutputStream;
>> > +import java.io.IOException;
>> > +import java.io.InputStream;
>> > +import java.net.URL;
>> > +import java.util.HashMap;
>> > +import java.util.HashSet;
>> > +import java.util.List;
>> > +import java.util.Map;
>> > +import java.util.Map.Entry;
>> > +import java.util.Set;
>> > +import java.util.UUID;
>> > +import java.util.concurrent.Callable;
>> > +import java.util.concurrent.ExecutionException;
>> > +import java.util.concurrent.ExecutorService;
>> > +import java.util.concurrent.Executors;
>> > +import java.util.concurrent.Future;
>> > +import java.util.concurrent.TimeUnit;
>> > +import java.util.concurrent.atomic.AtomicBoolean;
>> > +
>> > +import org.apache.blur.log.Log;
>> > +import org.apache.blur.log.LogFactory;
>> > +import org.apache.blur.mapreduce.lib.BlurInputFormat;
>> > +import org.apache.blur.thirdparty.thrift_0_9_0.TException;
>> > +import org.apache.blur.thrift.BlurClient;
>> > +import org.apache.blur.thrift.generated.Blur.Iface;
>> > +import org.apache.blur.thrift.generated.BlurException;
>> > +import org.apache.blur.thrift.generated.TableDescriptor;
>> > +import org.apache.blur.thrift.generated.TableStats;
>> > +import org.apache.blur.utils.BlurConstants;
>> > +import org.apache.commons.io.IOUtils;
>> > +import org.apache.hadoop.conf.Configuration;
>> > +import org.apache.hadoop.conf.Configured;
>> > +import org.apache.hadoop.fs.FSDataInputStream;
>> > +import org.apache.hadoop.fs.FileStatus;
>> > +import org.apache.hadoop.fs.FileSystem;
>> > +import org.apache.hadoop.fs.Path;
>> > +import org.apache.hadoop.fs.permission.FsAction;
>> > +import org.apache.hadoop.mapreduce.Cluster;
>> > +import org.apache.hadoop.mapreduce.Job;
>> > +import org.apache.hadoop.mapreduce.JobID;
>> > +import org.apache.hadoop.mapreduce.JobStatus;
>> > +import org.apache.hadoop.util.Tool;
>> > +import org.apache.hadoop.util.ToolRunner;
>> > +import org.apache.hadoop.yarn.exceptions.YarnException;
>> > +import org.apache.log4j.LogManager;
>> > +import org.apache.log4j.xml.DOMConfigurator;
>> > +
>> > +public class ClusterDriver extends Configured implements Tool {
>> > +
>> > +  private static final String BLUR_ENV = "blur.env";
>> > +  private static final Log LOG = LogFactory.getLog(
>> ClusterDriver.class);
>> > +  private static final String _SEP = "_";
>> > +  private static final String IMPORT = "import";
>> > +
>> > +  public static void main(String[] args) throws Exception {
>> > +    String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE");
>> > +    System.out.println("Log file path [" + logFilePath + "]");
>> > +    System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath);
>> > +    URL url = ClusterDriver.class.getResource("/program-log4j.xml");
>> > +    if (url != null) {
>> > +      LOG.info("Reseting log4j config from classpath resource [{0}]",
>> url);
>> > +      LogManager.resetConfiguration();
>> > +      DOMConfigurator.configure(url);
>> > +    }
>> > +    int res = ToolRunner.run(new Configuration(), new ClusterDriver(),
>> args);
>>
>> Not sure what this thing does yet but it seems we should validate
>> those args since their accessed blindly in run...
>>
>> --tim
>>

Mime
View raw message