incubator-blur-dev mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From Aaron McCurry <amccu...@gmail.com>
Subject Re: [03/13] git commit: Third round of updates.
Date Tue, 30 Aug 2016 14:08:48 GMT
Will do.  :-)

On Tue, Aug 30, 2016 at 9:10 AM, Tim Williams <williamstw@gmail.com> wrote:

> No worries, just a friendly reminder:)  If you get time, I think it'd
> be helpful to a couple sentences about any new stuff/big changes...
> seems like there's a new project for example...
>
> Thanks,
> --tim
>
>
> On Tue, Aug 30, 2016 at 7:49 AM, Aaron McCurry <amccurry@gmail.com> wrote:
> > I apologize for the big commits without proper messaging.  It was
> difficult
> > to remember the changs and the original commit messages were lost due to
> an
> > offline git repo (which is no longer is use).  I only had the diff
> between
> > the original git repo and everything after the changes.  Plus the diff
> > didn't apply cleanly so that's why I broke it up in to different
> sections.
> >
> > I suppose I should have broke up the changes manually out of the diff and
> > applied them separately and recreated all the commit messages but I
> didn't
> > have the time to work through all of them.  Sorry.
> >
> > Aaron
> >
> >
> > On Tuesday, August 30, 2016, Tim Williams <williamstw@gmail.com> wrote:
> >
> >> NoNot sure what this is yet but itPlease be more considerate with your
> >> commit messages... it's a lot of code to look through without having
> >> any context besides "N round of updates."
> >>
> >>
> >> On Mon, Aug 29, 2016 at 9:57 PM,  <amccurry@apache.org <javascript:;>>
> >> wrote:
> >> > Third round of updates.
> >> >
> >> >
> >> > Project: http://git-wip-us.apache.org/repos/asf/incubator-blur/repo
> >> > Commit: http://git-wip-us.apache.org/repos/asf/incubator-blur/
> >> commit/ea50630a
> >> > Tree: http://git-wip-us.apache.org/repos/asf/incubator-blur/tree/
> >> ea50630a
> >> > Diff: http://git-wip-us.apache.org/repos/asf/incubator-blur/diff/
> >> ea50630a
> >> >
> >> > Branch: refs/heads/master
> >> > Commit: ea50630a38d67675a61a916b144f3c0ce85d7f7a
> >> > Parents: 0141656
> >> > Author: Aaron McCurry <amccurry@gmail.com <javascript:;>>
> >> > Authored: Sat May 7 13:11:54 2016 -0400
> >> > Committer: Aaron McCurry <amccurry@gmail.com <javascript:;>>
> >> > Committed: Sat May 7 13:11:54 2016 -0400
> >> >
> >> > ------------------------------------------------------------
> ----------
> >> >  blur-indexer/pom.xml                            |  58 +++
> >> >  blur-indexer/src/main/assemble/bin.xml          |  45 ++
> >> >  .../mapreduce/lib/update/BlurIndexCounter.java  |  17 +
> >> >  .../mapreduce/lib/update/ClusterDriver.java     | 362 ++++++++++++++
> >> >  .../blur/mapreduce/lib/update/FasterDriver.java | 486
> >> +++++++++++++++++++
> >> >  .../update/HdfsConfigurationNamespaceMerge.java | 115 +++++
> >> >  .../lib/update/InputSplitPruneUtil.java         | 133 +++++
> >> >  .../lib/update/LookupBuilderMapper.java         |  18 +
> >> >  .../lib/update/LookupBuilderReducer.java        | 165 +++++++
> >> >  .../lib/update/MapperForExistingDataMod.java    |  46 ++
> >> >  .../MapperForExistingDataWithIndexLookup.java   | 228 +++++++++
> >> >  .../lib/update/MapperForNewDataMod.java         |  82 ++++
> >> >  .../lib/update/MergeSortRowIdMatcher.java       | 372 ++++++++++++++
> >> >  .../lib/update/PrunedBlurInputFormat.java       |  57 +++
> >> >  .../update/PrunedSequenceFileInputFormat.java   |  59 +++
> >> >  .../src/main/resources/blur-site.properties     |   1 +
> >> >  .../src/main/resources/program-log4j.xml        |  29 ++
> >> >  blur-indexer/src/main/resources/test-log4j.xml  |  46 ++
> >> >  18 files changed, 2319 insertions(+)
> >> > ------------------------------------------------------------
> ----------
> >> >
> >> >
> >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> >> ea50630a/blur-indexer/pom.xml
> >> > ------------------------------------------------------------
> ----------
> >> > diff --git a/blur-indexer/pom.xml b/blur-indexer/pom.xml
> >> > new file mode 100644
> >> > index 0000000..c7c1753
> >> > --- /dev/null
> >> > +++ b/blur-indexer/pom.xml
> >> > @@ -0,0 +1,58 @@
> >> > +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="
> >> http://www.w3.org/2001/XMLSchema-instance"
> >> > +       xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
> >> http://maven.apache.org/xsd/maven-4.0.0.xsd">
> >> > +       <modelVersion>4.0.0</modelVersion>
> >> > +       <groupId>org.apache.blur</groupId>
> >> > +       <artifactId>blur-indexer</artifactId>
> >> > +       <version>0.2.8</version>
> >> > +       <name>blur-indexer</name>
> >> > +       <packaging>jar</packaging>
> >> > +
> >> > +       <properties>
> >> > +               <blur.version>0.3.0.incubating.2.5.0.cdh5.3.3-
> >> SNAPSHOT</blur.version>
> >> > +       </properties>
> >> > +       <dependencies>
> >> > +               <dependency>
> >> > +                       <groupId>org.apache.blur</groupId>
> >> > +                       <artifactId>blur-mapred</artifactId>
> >> > +                       <version>${blur.version}</version>
> >> > +               </dependency>
> >> > +               <dependency>
> >> > +                       <groupId>junit</groupId>
> >> > +                       <artifactId>junit</artifactId>
> >> > +                       <version>4.9</version>
> >> > +                       <scope>test</scope>
> >> > +               </dependency>
> >> > +       </dependencies>
> >> > +
> >> > +       <build>
> >> > +               <pluginManagement>
> >> > +                       <plugins>
> >> > +                               <plugin>
> >> > +                                       <groupId>org.apache.maven.
> >> plugins</groupId>
> >> > +                                       <artifactId>maven-compiler-
> >> plugin</artifactId>
> >> > +                                       <configuration>
> >> > +                                               <source>1.8</source>
> >> > +                                               <target>1.8</target>
> >> > +                                       </configuration>
> >> > +                               </plugin>
> >> > +                       </plugins>
> >> > +               </pluginManagement>
> >> > +               <plugins>
> >> > +                       <plugin>
> >> > +                               <artifactId>maven-assembly-
> >> plugin</artifactId>
> >> > +                               <configuration>
> >> > +                                       <descriptor>src/main/assemble/
> >> bin.xml</descriptor>
> >> > +                                       <finalName>blur-indexer-${
> >> project.version}</finalName>
> >> > +                               </configuration>
> >> > +                               <executions>
> >> > +                                       <execution>
> >> > +                                               <phase>package</phase>
> >> > +                                               <goals>
> >> > +
> >>  <goal>single</goal>
> >> > +                                               </goals>
> >> > +                                       </execution>
> >> > +                               </executions>
> >> > +                       </plugin>
> >> > +               </plugins>
> >> > +       </build>
> >> > +</project>
> >> >
> >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> >> ea50630a/blur-indexer/src/main/assemble/bin.xml
> >> > ------------------------------------------------------------
> ----------
> >> > diff --git a/blur-indexer/src/main/assemble/bin.xml
> >> b/blur-indexer/src/main/assemble/bin.xml
> >> > new file mode 100644
> >> > index 0000000..5fddd56
> >> > --- /dev/null
> >> > +++ b/blur-indexer/src/main/assemble/bin.xml
> >> > @@ -0,0 +1,45 @@
> >> > +<assembly xmlns="http://maven.apache.org/plugins/maven-assembly-
> >> plugin/assembly/1.1.2"
> >> > +    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
> >> > +           xsi:schemaLocation="http://maven.apache.org/plugins/
> >> maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/
> >> assembly-1.1.2.xsd">
> >> > +  <formats>
> >> > +    <format>tar.gz</format>
> >> > +  </formats>
> >> > +  <includeBaseDirectory>false</includeBaseDirectory>
> >> > +
> >> > +  <dependencySets>
> >> > +    <dependencySet>
> >> > +      <useProjectArtifact>true</useProjectArtifact>
> >> > +      <outputDirectory>blur-indexer-${project.version}/lib</
> >> outputDirectory>
> >> > +      <unpack>false</unpack>
> >> > +      <includes>
> >> > +        <include>org.apache.blur:blur-indexer</include>
> >> > +        <include>org.apache.blur:*</include>
> >> > +        <include>org.apache.zookeeper:zookeeper</include>
> >> > +        <include>org.slf4j:slf4j-api</include>
> >> > +        <include>org.slf4j:slf4j-log4j12</include>
> >> > +        <include>org.json:json</include>
> >> > +        <include>log4j:log4j</include>
> >> > +        <include>com.yammer.metrics:*</include>
> >> > +        <include>com.google.guava:guava</include>
> >> > +        <include>org.apache.httpcomponents:*</include>
> >> > +        <include>org.apache.lucene:*</include>
> >> > +        <include>com.spatial4j:spatial4j</include>
> >> > +        <include>commons-cli:commons-cli</include>
> >> > +        <include>org.eclipse.jetty:*</include>
> >> > +        <include>com.googlecode.concurrentlinkedhashmap:
> >> concurrentlinkedhashmap-lru</include>
> >> > +        <include>jline:jline</include>
> >> > +        <include>com.fasterxml.jackson.core:*</include>
> >> > +      </includes>
> >> > +    </dependencySet>
> >> > +  </dependencySets>
> >> > +
> >> > +  <fileSets>
> >> > +    <fileSet>
> >> > +      <directory>${project.build.scriptSourceDirectory}</directory>
> >> > +      <outputDirectory>blur-indexer-${project.version}</
> >> outputDirectory>
> >> > +      <excludes>
> >> > +        <exclude>**/.empty</exclude>
> >> > +      </excludes>
> >> > +    </fileSet>
> >> > +  </fileSets>
> >> > +</assembly>
> >> >
> >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> >> ea50630a/blur-indexer/src/main/java/org/apache/blur/
> mapreduce/lib/update/
> >> BlurIndexCounter.java
> >> > ------------------------------------------------------------
> ----------
> >> > diff --git a/blur-indexer/src/main/java/
> org/apache/blur/mapreduce/lib/update/BlurIndexCounter.java
> >> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> >> update/BlurIndexCounter.java
> >> > new file mode 100644
> >> > index 0000000..a9caabb
> >> > --- /dev/null
> >> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> >> update/BlurIndexCounter.java
> >> > @@ -0,0 +1,17 @@
> >> > +package org.apache.blur.mapreduce.lib.update;
> >> > +
> >> > +public enum BlurIndexCounter {
> >> > +
> >> > +  NEW_RECORDS,
> >> > +  ROW_IDS_FROM_INDEX,
> >> > +  ROW_IDS_TO_UPDATE_FROM_NEW_DATA,
> >> > +  ROW_IDS_FROM_NEW_DATA,
> >> > +
> >> > +  INPUT_FORMAT_MAPPER,
> >> > +  INPUT_FORMAT_EXISTING_RECORDS,
> >> > +
> >> > +  LOOKUP_MAPPER,
> >> > +  LOOKUP_MAPPER_EXISTING_RECORDS,
> >> > +  LOOKUP_MAPPER_ROW_LOOKUP_ATTEMPT
> >> > +
> >> > +}
> >> >
> >> > http://git-wip-us.apache.org/repos/asf/incubator-blur/blob/
> >> ea50630a/blur-indexer/src/main/java/org/apache/blur/
> mapreduce/lib/update/
> >> ClusterDriver.java
> >> > ------------------------------------------------------------
> ----------
> >> > diff --git a/blur-indexer/src/main/java/
> org/apache/blur/mapreduce/lib/update/ClusterDriver.java
> >> b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> >> update/ClusterDriver.java
> >> > new file mode 100644
> >> > index 0000000..d44adf1
> >> > --- /dev/null
> >> > +++ b/blur-indexer/src/main/java/org/apache/blur/mapreduce/lib/
> >> update/ClusterDriver.java
> >> > @@ -0,0 +1,362 @@
> >> > +package org.apache.blur.mapreduce.lib.update;
> >> > +
> >> > +import java.io.ByteArrayInputStream;
> >> > +import java.io.ByteArrayOutputStream;
> >> > +import java.io.IOException;
> >> > +import java.io.InputStream;
> >> > +import java.net.URL;
> >> > +import java.util.HashMap;
> >> > +import java.util.HashSet;
> >> > +import java.util.List;
> >> > +import java.util.Map;
> >> > +import java.util.Map.Entry;
> >> > +import java.util.Set;
> >> > +import java.util.UUID;
> >> > +import java.util.concurrent.Callable;
> >> > +import java.util.concurrent.ExecutionException;
> >> > +import java.util.concurrent.ExecutorService;
> >> > +import java.util.concurrent.Executors;
> >> > +import java.util.concurrent.Future;
> >> > +import java.util.concurrent.TimeUnit;
> >> > +import java.util.concurrent.atomic.AtomicBoolean;
> >> > +
> >> > +import org.apache.blur.log.Log;
> >> > +import org.apache.blur.log.LogFactory;
> >> > +import org.apache.blur.mapreduce.lib.BlurInputFormat;
> >> > +import org.apache.blur.thirdparty.thrift_0_9_0.TException;
> >> > +import org.apache.blur.thrift.BlurClient;
> >> > +import org.apache.blur.thrift.generated.Blur.Iface;
> >> > +import org.apache.blur.thrift.generated.BlurException;
> >> > +import org.apache.blur.thrift.generated.TableDescriptor;
> >> > +import org.apache.blur.thrift.generated.TableStats;
> >> > +import org.apache.blur.utils.BlurConstants;
> >> > +import org.apache.commons.io.IOUtils;
> >> > +import org.apache.hadoop.conf.Configuration;
> >> > +import org.apache.hadoop.conf.Configured;
> >> > +import org.apache.hadoop.fs.FSDataInputStream;
> >> > +import org.apache.hadoop.fs.FileStatus;
> >> > +import org.apache.hadoop.fs.FileSystem;
> >> > +import org.apache.hadoop.fs.Path;
> >> > +import org.apache.hadoop.fs.permission.FsAction;
> >> > +import org.apache.hadoop.mapreduce.Cluster;
> >> > +import org.apache.hadoop.mapreduce.Job;
> >> > +import org.apache.hadoop.mapreduce.JobID;
> >> > +import org.apache.hadoop.mapreduce.JobStatus;
> >> > +import org.apache.hadoop.util.Tool;
> >> > +import org.apache.hadoop.util.ToolRunner;
> >> > +import org.apache.hadoop.yarn.exceptions.YarnException;
> >> > +import org.apache.log4j.LogManager;
> >> > +import org.apache.log4j.xml.DOMConfigurator;
> >> > +
> >> > +public class ClusterDriver extends Configured implements Tool {
> >> > +
> >> > +  private static final String BLUR_ENV = "blur.env";
> >> > +  private static final Log LOG = LogFactory.getLog(
> >> ClusterDriver.class);
> >> > +  private static final String _SEP = "_";
> >> > +  private static final String IMPORT = "import";
> >> > +
> >> > +  public static void main(String[] args) throws Exception {
> >> > +    String logFilePath = System.getenv("BLUR_INDEXER_LOG_FILE");
> >> > +    System.out.println("Log file path [" + logFilePath + "]");
> >> > +    System.setProperty("BLUR_INDEXER_LOG_FILE", logFilePath);
> >> > +    URL url = ClusterDriver.class.getResource("/program-log4j.xml");
> >> > +    if (url != null) {
> >> > +      LOG.info("Reseting log4j config from classpath resource [{0}]",
> >> url);
> >> > +      LogManager.resetConfiguration();
> >> > +      DOMConfigurator.configure(url);
> >> > +    }
> >> > +    int res = ToolRunner.run(new Configuration(), new
> ClusterDriver(),
> >> args);
> >>
> >> Not sure what this thing does yet but it seems we should validate
> >> those args since their accessed blindly in run...
> >>
> >> --tim
> >>
>

Mime
  • Unnamed multipart/alternative (inline, None, 0 bytes)
View raw message