pig-commits mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From da...@apache.org
Subject svn commit: r982786 - in /hadoop/pig/trunk/contrib/piggybank/java/src: main/java/org/apache/pig/piggybank/storage/partition/ test/java/org/apache/pig/piggybank/test/storage/
Date Thu, 05 Aug 2010 21:07:38 GMT
Author: daijy
Date: Thu Aug  5 21:07:37 2010
New Revision: 982786

URL: http://svn.apache.org/viewvc?rev=982786&view=rev
Log:
PIG-1526: HiveColumnarLoader Partitioning Support (missing some code in last check in)

Added:
    hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/
    hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitionHelper.java
    hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitioner.java
    hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitionHelper.java
    hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitioner.java

Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitionHelper.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitionHelper.java?rev=982786&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitionHelper.java
(added)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitionHelper.java
Thu Aug  5 21:07:37 2010
@@ -0,0 +1,419 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.pig.piggybank.storage.partition;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Properties;
+import java.util.Set;
+
+import javax.el.ELContext;
+import javax.el.ELResolver;
+import javax.el.ExpressionFactory;
+import javax.el.FunctionMapper;
+import javax.el.ValueExpression;
+import javax.el.VariableMapper;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.log4j.Logger;
+import org.apache.pig.LoadFunc;
+import org.apache.pig.impl.util.UDFContext;
+
+/**
+ * Implements the logic for:<br/>
+ * <ul>
+ * <li>Listing partition keys and values used in an hdfs path</li>
+ * <li>Filtering of partitions from a pig filter operator expression</li>
+ * </ul>
+ * <p/>
+ * <b>Restrictions</b> <br/>
+ * Function calls are not supported by this partition helper and it can only
+ * handle String values.<br/>
+ * This is normally not a problem given that partition values are part of the
+ * hdfs folder path and is given a<br/>
+ * determined value that would not need parsing by any external processes.<br/>
+ * 
+ * 
+ */
+public class PathPartitionHelper {
+
+    public static final String PARTITION_COLUMNS = PathPartitionHelper.class
+	    + ".partition-columns";
+    public static final String PARITITION_FILTER_EXPRESSION = PathPartitionHelper.class
+	    .getName() + ".partition-filter";
+
+    private static final Logger LOG = Logger
+	    .getLogger(PathPartitionHelper.class);
+
+    transient PathPartitioner pathPartitioner = new PathPartitioner();
+
+    /**
+     * Returns the Partition keys and each key's value for a single location.<br/>
+     * That is the location must be something like
+     * mytable/partition1=a/partition2=b/myfile.<br/>
+     * This method will return a map with [partition1='a', partition2='b']<br/>
+     * The work is delegated to the PathPartitioner class
+     * 
+     * @param location
+     * @return Map of String, String
+     * @throws IOException
+     */
+    public Map<String, String> getPathPartitionKeyValues(String location)
+	    throws IOException {
+	return pathPartitioner.getPathPartitionKeyValues(location);
+    }
+
+    /**
+     * Returns the partition keys for a location.<br/>
+     * The work is delegated to the PathPartitioner class
+     * 
+     * @param location
+     *            String must be the base directory for the partitions
+     * @param conf
+     * @return
+     * @throws IOException
+     */
+    public Set<String> getPartitionKeys(String location, Configuration conf)
+	    throws IOException {
+	return pathPartitioner.getPartitionKeys(location, conf);
+    }
+
+    /**
+     * Sets the PARITITION_FILTER_EXPRESSION property in the UDFContext
+     * identified by the loaderClass.
+     * 
+     * @param partitionFilterExpression
+     * @param loaderClass
+     * @throws IOException
+     */
+    public void setPartitionFilterExpression(String partitionFilterExpression,
+	    Class<? extends LoadFunc> loaderClass, String signature)
+	    throws IOException {
+
+	UDFContext
+		.getUDFContext()
+		.getUDFProperties(loaderClass, new String[] { signature })
+		.setProperty(PARITITION_FILTER_EXPRESSION,
+			partitionFilterExpression);
+
+    }
+
+    /**
+     * Reads the partition keys from the location i.e the base directory
+     * 
+     * @param location
+     *            String must be the base directory for the partitions
+     * @param conf
+     * @param loaderClass
+     * @throws IOException
+     */
+    public void setPartitionKeys(String location, Configuration conf,
+	    Class<? extends LoadFunc> loaderClass, String signature)
+	    throws IOException {
+
+	Set<String> partitionKeys = getPartitionKeys(location, conf);
+
+	if (partitionKeys != null) {
+	    StringBuilder buff = new StringBuilder();
+	    int i = 0;
+	    for (String key : partitionKeys) {
+		if (i++ != 0) {
+		    buff.append(",");
+		}
+
+		buff.append(key);
+	    }
+
+	    UDFContext.getUDFContext()
+		    .getUDFProperties(loaderClass, new String[] { signature })
+		    .setProperty(PARTITION_COLUMNS, buff.toString());
+	}
+
+    }
+
+    /**
+     * This method is called by the FileInputFormat to find the input paths for
+     * which splits should be calculated.<br/>
+     * If applyDateRanges == true: Then the HiveRCDateSplitter is used to apply
+     * filtering on the input files.<br/>
+     * Else the default FileInputFormat listStatus method is used.
+     * 
+     * @param ctx
+     *            JobContext
+     * @param loaderClass
+     *            this is chosen to be a subclass of LoadFunc to maintain some
+     *            consistency.
+     */
+    public List<FileStatus> listStatus(JobContext ctx,
+	    Class<? extends LoadFunc> loaderClass, String signature)
+	    throws IOException {
+
+	Properties properties = UDFContext.getUDFContext().getUDFProperties(
+		loaderClass, new String[] { signature });
+
+	String partitionExpression = properties
+		.getProperty(PARITITION_FILTER_EXPRESSION);
+
+	ExpressionFactory expressionFactory = null;
+
+	if (partitionExpression != null) {
+	    expressionFactory = ExpressionFactory.newInstance();
+	}
+
+	String partitionColumnStr = properties
+		.getProperty(PathPartitionHelper.PARTITION_COLUMNS);
+	String[] partitionKeys = (partitionColumnStr == null) ? null
+		: partitionColumnStr.split(",");
+
+	Path[] inputPaths = FileInputFormat.getInputPaths(ctx);
+
+	List<FileStatus> splitPaths = null;
+
+	if (partitionKeys != null) {
+
+	    splitPaths = new ArrayList<FileStatus>();
+
+	    for (Path inputPath : inputPaths) {
+		// for each input path work recursively through each partition
+		// level to find the rc files
+
+		FileSystem fs = inputPath.getFileSystem(ctx.getConfiguration());
+
+		if (fs.getFileStatus(inputPath).isDir()) {
+		    // assure that we are at the root of the partition tree.
+		    FileStatus fileStatusArr[] = fs.listStatus(inputPath);
+
+		    if (fileStatusArr != null) {
+			for (FileStatus childFileStatus : fileStatusArr) {
+			    getPartitionedFiles(expressionFactory,
+				    partitionExpression, fs, childFileStatus,
+				    0, partitionKeys, splitPaths);
+			}
+		    }
+
+		} else {
+		    splitPaths.add(fs.getFileStatus(inputPath));
+		}
+
+	    }
+
+	    if (splitPaths.size() < 1) {
+		LOG.error("Not split paths where found, please check that the filter logic for the partition
keys does not filter out everything ");
+	    }
+
+	}
+
+	return splitPaths;
+    }
+
+    /**
+     * Recursively works through all directories, skipping filtered partitions.
+     * 
+     * @param fs
+     * @param fileStatus
+     * @param partitionLevel
+     * @param partitionKeys
+     * @param splitPaths
+     * @throws IOException
+     */
+    private void getPartitionedFiles(ExpressionFactory expressionFactory,
+	    String partitionExpression, FileSystem fs, FileStatus fileStatus,
+	    int partitionLevel, String[] partitionKeys,
+	    List<FileStatus> splitPaths) throws IOException {
+
+	String partition = (partitionLevel < partitionKeys.length) ? partitionKeys[partitionLevel]
+		: null;
+
+	Path path = fileStatus.getPath();
+
+	// filter out hidden files
+	if (path.getName().startsWith("_")) {
+	    return;
+	}
+
+	// pre filter logic
+	// return if any of the logic is not true
+	if (partition != null) {
+	    if (fileStatus.isDir()) {
+
+		// check that the dir name is equal to that of the partition
+		// name
+		if (!path.getName().startsWith(partition))
+		    return;
+
+	    } else {
+		// else its a file but not at the end of the partition tree so
+		// its ignored.
+		return;
+	    }
+
+	    // this means we are inside the partition so that the path will
+	    // contain all partitions plus its values
+	    // we can apply the partition filter expression here that was passed
+	    // to the HiveColumnarLoader.setPartitionExpression
+	    if (partitionLevel == (partitionKeys.length - 1)
+		    && !evaluatePartitionExpression(expressionFactory,
+			    partitionExpression, path)) {
+
+		LOG.debug("Pruning partition: " + path);
+		return;
+
+	    }
+
+	}
+
+	// after this point we now that the partition is either null
+	// which means we are at the end of the partition tree and all files
+	// sub directories should be included.
+	// or that we are still navigating the partition tree.
+	int nextPartitionLevel = partitionLevel + 1;
+
+	// iterate over directories if fileStatus is a dir.
+	FileStatus[] childStatusArr = null;
+
+	if (fileStatus.isDir()) {
+	    if ((childStatusArr = fs.listStatus(path)) != null) {
+		for (FileStatus childFileStatus : childStatusArr) {
+		    getPartitionedFiles(expressionFactory, partitionExpression,
+			    fs, childFileStatus, nextPartitionLevel,
+			    partitionKeys, splitPaths);
+		}
+	    }
+	} else {
+	    // add file to splitPaths
+	    splitPaths.add(fileStatus);
+	}
+
+    }
+
+    /**
+     * Evaluates the partitionExpression set in the
+     * HiveColumnarLoader.setPartitionExpression. * @
+     * 
+     * @param partitionExpression
+     *            String
+     * @param path
+     *            Path
+     * @return boolean
+     * @throws IOException
+     */
+    private boolean evaluatePartitionExpression(
+	    ExpressionFactory expressionFactory, String partitionExpression,
+	    Path path) throws IOException {
+
+	boolean ret = true;
+
+	if (expressionFactory != null) {
+	    if (!partitionExpression.startsWith("${")) {
+		partitionExpression = "${" + partitionExpression + "}";
+	    }
+
+	    Map<String, String> context = pathPartitioner
+		    .getPathPartitionKeyValues(path.toString());
+
+	    MapVariableMapper mapper = new MapVariableMapper(expressionFactory,
+		    context);
+	    VariableContext varContext = new VariableContext(mapper);
+
+	    ValueExpression evalExpression = expressionFactory
+		    .createValueExpression(varContext, partitionExpression,
+			    Boolean.class);
+
+	    ret = (Boolean) evalExpression.getValue(varContext);
+
+	    LOG.debug("Evaluated: " + partitionExpression + " returned: " + ret);
+
+	}
+
+	return ret;
+    }
+
+    /**
+     * 
+     * ELContext implementation containing the VariableMapper MapVariableMapper
+     * 
+     */
+    class VariableContext extends ELContext {
+
+	VariableMapper variableMapper;
+
+	VariableContext(VariableMapper variableMapper) {
+	    this.variableMapper = variableMapper;
+	}
+
+	@Override
+	public ELResolver getELResolver() {
+	    // TODO Auto-generated method stub
+	    return null;
+	}
+
+	@Override
+	public FunctionMapper getFunctionMapper() {
+	    return null;
+	}
+
+	@Override
+	public VariableMapper getVariableMapper() {
+	    return variableMapper;
+	}
+
+    }
+
+    /**
+     * Implementation for the VariableMapper that takes the values in a Map and
+     * creates ValueExpression objects for each.
+     * 
+     */
+    class MapVariableMapper extends VariableMapper {
+	private Map<String, ValueExpression> valueExpressionMap;
+
+	public MapVariableMapper(ExpressionFactory expressionFactory,
+		Map<String, String> variableMap) {
+
+	    valueExpressionMap = new HashMap<String, ValueExpression>();
+
+	    for (Entry<String, String> entry : variableMap.entrySet()) {
+		ValueExpression valExpr = expressionFactory
+			.createValueExpression(entry.getValue(), String.class);
+		valueExpressionMap.put(entry.getKey(), valExpr);
+	    }
+
+	}
+
+	@Override
+	public ValueExpression resolveVariable(String variableName) {
+	    return valueExpressionMap.get(variableName);
+	}
+
+	@Override
+	public ValueExpression setVariable(String variableName,
+		ValueExpression valueExpression) {
+	    return valueExpressionMap.put(variableName, valueExpression);
+	}
+
+    }
+
+}

Added: hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitioner.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitioner.java?rev=982786&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitioner.java
(added)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/main/java/org/apache/pig/piggybank/storage/partition/PathPartitioner.java
Thu Aug  5 21:07:37 2010
@@ -0,0 +1,171 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.pig.piggybank.storage.partition;
+
+import java.io.IOException;
+import java.util.LinkedHashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+
+/**
+ * 
+ * Its convenient sometimes to partition logs by date values or other e.g.
+ * country, city etc.<br/>
+ * A daydate partitioned hdfs directory might look something like:<br/>
+ * 
+ * <pre>
+ * /logs/repo/mylog/
+ * 					daydate=2010-01-01
+ * 				    daydate=2010-01-02
+ * </pre>
+ * 
+ * This class accepts a path like /logs/repo/mylog and return a map of the
+ * partition keys
+ */
+public class PathPartitioner {
+
+    /**
+     * Note: this must be the path lowes in the Searches for the key=value pairs
+     * in the path pointer by the location parameter.
+     * 
+     * @param location
+     *            String root path in hdsf e.g. /user/hive/warehouse or
+     *            /logs/repo
+     * @param conf
+     *            Configuration
+     * @return Set of String. The order is maintained as per the directory tree.
+     *         i.e. if /logs/repo/year=2010/month=2010 exists the first item in
+     *         the set will be year and the second month.
+     * @throws IOException
+     */
+    public Map<String, String> getPathPartitionKeyValues(String location)
+	    throws IOException {
+
+	// use LinkedHashSet because order is important here.
+	Map<String, String> partitionKeys = new LinkedHashMap<String, String>();
+
+	String[] pathSplit = location.split("/");
+
+	for (String pathSplitItem : pathSplit) {
+	    parseAndPutKeyValue(pathSplitItem, partitionKeys);
+	}
+
+	return partitionKeys;
+    }
+
+    /**
+     * Searches for the key=value pairs in the path pointer by the location
+     * parameter.
+     * 
+     * @param location
+     *            String root path in hdsf e.g. /user/hive/warehouse or
+     *            /logs/repo
+     * @param conf
+     *            Configuration
+     * @return Set of String. The order is maintained as per the directory tree.
+     *         i.e. if /logs/repo/year=2010/month=2010 exists the first item in
+     *         the set will be year and the second month.
+     * @throws IOException
+     */
+    public Set<String> getPartitionKeys(String location, Configuration conf)
+	    throws IOException {
+
+	// find the hive type partition key=value pairs from the path.
+	// first parse the string alone.
+	Path path = new Path(location);
+	FileSystem fs = path.getFileSystem(conf);
+
+	FileStatus[] fileStatusArr = null;
+
+	// use LinkedHashSet because order is important here.
+	Set<String> partitionKeys = new LinkedHashSet<String>();
+
+	parseAndPutKeyValue(location, partitionKeys);
+
+	while (!((fileStatusArr = fs.listStatus(path)) == null || fs
+		.isFile(path))) {
+	    for (FileStatus fileStatus : fileStatusArr) {
+
+		path = fileStatus.getPath();
+
+		// ignore hidden directories
+		if (fileStatus.getPath().getName().startsWith("_")
+			|| !fileStatus.isDir())
+		    continue;
+
+		parseAndPutKeyValue(path.getName(), partitionKeys);
+		// at the first directory found stop the for loop after parsing
+		// for key value pairs
+		break;
+	    }
+
+	}
+
+	return partitionKeys;
+    }
+
+    private final void parseAndPutKeyValue(String pathName,
+	    Map<String, String> partitionKeys) {
+	String[] keyValue = parsePathKeyValue(pathName);
+	if (keyValue != null) {
+	    partitionKeys.put(keyValue[0], keyValue[1]);
+	}
+
+    }
+
+    private final void parseAndPutKeyValue(String pathName,
+	    Set<String> partitionKeys) {
+	String[] keyValue = parsePathKeyValue(pathName);
+	if (keyValue != null) {
+	    partitionKeys.add(keyValue[0]);
+	}
+
+    }
+
+    /**
+     * Will look for key=value pairs in the path for example:
+     * /user/hive/warehouse/mylogs/year=2010/month=07
+     * 
+     * @param path
+     * @return String[] [0]= key [1] = value
+     */
+    public String[] parsePathKeyValue(String path) {
+	int slashIndex = path.lastIndexOf('/');
+	String parsedPath = path;
+	String[] keyValue = null;
+
+	if (slashIndex > 0) {
+	    parsedPath = path.substring(slashIndex);
+	}
+
+	if (parsedPath.contains("=")) {
+	    String split[] = parsedPath.split("=");
+	    if (split.length == 2) {
+		keyValue = split;
+	    }
+	}
+
+	return keyValue;
+    }
+
+}

Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitionHelper.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitionHelper.java?rev=982786&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitionHelper.java
(added)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitionHelper.java
Thu Aug  5 21:07:37 2010
@@ -0,0 +1,162 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.pig.piggybank.test.storage;
+
+import java.io.File;
+import java.util.List;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileStatus;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.pig.builtin.PigStorage;
+import org.apache.pig.piggybank.storage.partition.PathPartitionHelper;
+import org.apache.pig.test.Util;
+import org.junit.Test;
+
+/**
+ * 
+ * Tests the PathPartitionHelper can:<br/>
+ * <ul>
+ * <li>Filter path partitioned files based on an expression being true</li>
+ * <li>Filter path partitioned files based on an expression being false</li>
+ * <li>Filter path partitioned files with no expression</li>
+ * </ul>
+ * 
+ */
+public class TestPathPartitionHelper extends TestCase {
+
+    private static Configuration conf = null;
+
+    File baseDir;
+    File partition1;
+    File partition2;
+    File partition3;
+
+    @Test
+    public void testListStatusPartitionFilterNotFound() throws Exception {
+
+	PathPartitionHelper partitionHelper = new PathPartitionHelper();
+
+	Job job = new Job(conf);
+	job.setJobName("TestJob");
+	job.setInputFormatClass(FileInputFormat.class);
+
+	Configuration conf = job.getConfiguration();
+	FileInputFormat.setInputPaths(job, new Path(baseDir.getAbsolutePath()));
+
+	JobContext jobContext = new JobContext(conf, job.getJobID());
+
+	partitionHelper.setPartitionFilterExpression("year < '2010'",
+		PigStorage.class, "1");
+	partitionHelper.setPartitionKeys(baseDir.getAbsolutePath(), conf,
+		PigStorage.class, "1");
+
+	List<FileStatus> files = partitionHelper.listStatus(jobContext,
+		PigStorage.class, "1");
+
+	assertEquals(0, files.size());
+
+    }
+
+    @Test
+    public void testListStatusPartitionFilterFound() throws Exception {
+
+	PathPartitionHelper partitionHelper = new PathPartitionHelper();
+
+	Job job = new Job(conf);
+	job.setJobName("TestJob");
+	job.setInputFormatClass(FileInputFormat.class);
+
+	Configuration conf = job.getConfiguration();
+	FileInputFormat.setInputPaths(job, new Path(baseDir.getAbsolutePath()));
+
+	JobContext jobContext = new JobContext(conf, job.getJobID());
+
+	partitionHelper.setPartitionFilterExpression(
+		"year<='2010' and month=='01' and day>='01'", PigStorage.class, "2");
+	partitionHelper.setPartitionKeys(baseDir.getAbsolutePath(), conf,
+		PigStorage.class, "2");
+
+	List<FileStatus> files = partitionHelper.listStatus(jobContext,
+		PigStorage.class, "2");
+
+	assertNotNull(files);
+	assertEquals(1, files.size());
+
+    }
+
+    @Test
+    public void testListStatus() throws Exception {
+
+	PathPartitionHelper partitionHelper = new PathPartitionHelper();
+
+	Job job = new Job(conf);
+	job.setJobName("TestJob");
+	job.setInputFormatClass(FileInputFormat.class);
+
+	Configuration conf = job.getConfiguration();
+	FileInputFormat.setInputPaths(job, new Path(baseDir.getAbsolutePath()));
+
+	JobContext jobContext = new JobContext(conf, job.getJobID());
+
+	partitionHelper.setPartitionKeys(baseDir.getAbsolutePath(), conf,
+		PigStorage.class, "3");
+
+	List<FileStatus> files = partitionHelper.listStatus(jobContext,
+		PigStorage.class, "3");
+
+	assertNotNull(files);
+	assertEquals(1, files.size());
+
+    }
+
+    @Override
+    protected void tearDown() throws Exception {
+
+	Util.deleteDirectory(baseDir);
+
+    }
+
+    @Override
+    protected void setUp() throws Exception {
+	conf = new Configuration(false);
+
+	baseDir = createDir(null,
+		"testPathPartitioner-testGetKeys-" + System.currentTimeMillis());
+
+	partition1 = createDir(baseDir, "year=2010");
+	partition2 = createDir(partition1, "month=01");
+	partition3 = createDir(partition2, "day=01");
+
+	File file = new File(partition3, "testfile-"
+		+ System.currentTimeMillis());
+	file.createNewFile();
+
+    }
+
+    private File createDir(File parent, String name) {
+	File file = new File(parent, name);
+	file.mkdirs();
+	return file;
+    }
+
+}

Added: hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitioner.java
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitioner.java?rev=982786&view=auto
==============================================================================
--- hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitioner.java
(added)
+++ hadoop/pig/trunk/contrib/piggybank/java/src/test/java/org/apache/pig/piggybank/test/storage/TestPathPartitioner.java
Thu Aug  5 21:07:37 2010
@@ -0,0 +1,111 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.pig.piggybank.test.storage;
+
+import java.io.File;
+import java.util.Map;
+import java.util.Set;
+
+import junit.framework.TestCase;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.pig.piggybank.storage.partition.PathPartitioner;
+import org.apache.pig.test.Util;
+import org.junit.Test;
+
+/**
+ * 
+ * Tests that the PathPartitioner can:<br/>
+ * <ul>
+ *   <li>Read keys from a partitioned file path</li>
+ *   <li>Read keys and values from a partitioned file path</li>
+ * </ul>
+ *
+ */
+public class TestPathPartitioner extends TestCase {
+
+    private static Configuration conf = null;
+
+    File baseDir;
+    File partition1;
+    File partition2;
+    File partition3;
+
+    @Override
+    protected void tearDown() throws Exception {
+
+	Util.deleteDirectory(baseDir);
+
+    }
+
+    @Override
+    protected void setUp() throws Exception {
+	conf = new Configuration();
+
+	baseDir = createDir(null,
+		"testPathPartitioner-testGetKeys-" + System.currentTimeMillis());
+
+	partition1 = createDir(baseDir, "year=2010");
+	partition2 = createDir(partition1, "month=01");
+	partition3 = createDir(partition2, "day=01");
+
+	File file = new File(partition3, "testfile-"
+		+ System.currentTimeMillis());
+	file.createNewFile();
+
+    }
+
+    @Test
+    public void testGetKeyValues() throws Exception {
+	PathPartitioner partitioner = new PathPartitioner();
+
+	Map<String, String> map = partitioner
+		.getPathPartitionKeyValues(partition3.getAbsolutePath());
+
+	String[] keys = map.keySet().toArray(new String[] {});
+
+	assertEquals("2010", map.get(keys[0]));
+	assertEquals("01", map.get(keys[1]));
+	assertEquals("01", map.get(keys[2]));
+
+    }
+
+    @Test
+    public void testGetKeys() throws Exception {
+
+	PathPartitioner pathPartitioner = new PathPartitioner();
+	Set<String> keys = pathPartitioner.getPartitionKeys(
+		baseDir.getAbsolutePath(), conf);
+
+	assertNotNull(keys);
+	assertEquals(3, keys.size());
+
+	String[] keyArr = keys.toArray(new String[] {});
+
+	assertEquals("year", keyArr[0]);
+	assertEquals("month", keyArr[1]);
+	assertEquals("day", keyArr[2]);
+
+    }
+
+    private File createDir(File parent, String name) {
+	File file = new File(parent, name);
+	file.mkdirs();
+	return file;
+    }
+
+}



Mime
View raw message