carbondata-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From lionelcao <...@git.apache.org>
Subject [GitHub] carbondata pull request #1192: [CARBONDATA-940] alter table add/split partit...
Date Thu, 03 Aug 2017 07:09:21 GMT
Github user lionelcao commented on a diff in the pull request:

    https://github.com/apache/carbondata/pull/1192#discussion_r131067082
  
    --- Diff: hadoop/src/main/java/org/apache/carbondata/hadoop/api/CarbonTableInputFormat.java
---
    @@ -321,6 +321,84 @@ private AbsoluteTableIdentifier getAbsoluteTableIdentifier(Configuration
configu
       }
     
       /**
    +   * Read data in one segment. For alter table partition statement
    +   * @param job
    +   * @param targetSegment
    +   * @param oldPartitionIdList  get old partitionId before partitionInfo was changed
    +   * @return
    +   * @throws IOException
    +   */
    +  public List<InputSplit> getSplitsOfOneSegment(JobContext job, String targetSegment,
    +      List<Integer> oldPartitionIdList, PartitionInfo partitionInfo)
    +      throws IOException {
    +    AbsoluteTableIdentifier identifier = getAbsoluteTableIdentifier(job.getConfiguration());
    +    List<String> invalidSegments = new ArrayList<>();
    +    List<UpdateVO> invalidTimestampsList = new ArrayList<>();
    +
    +    List<String> segmentList = new ArrayList<>();
    +    segmentList.add(targetSegment);
    +    setSegmentsToAccess(job.getConfiguration(), segmentList);
    +    try {
    +
    +      // process and resolve the expression
    +      Expression filter = getFilterPredicates(job.getConfiguration());
    +      CarbonTable carbonTable = getOrCreateCarbonTable(job.getConfiguration());
    +      // this will be null in case of corrupt schema file.
    +      if (null == carbonTable) {
    +        throw new IOException("Missing/Corrupt schema file for table.");
    +      }
    +
    +      CarbonInputFormatUtil.processFilterExpression(filter, carbonTable);
    +
    +      // prune partitions for filter query on partition table
    +      String partitionIds = job.getConfiguration().get(ALTER_PARTITION_ID);
    +      BitSet matchedPartitions = null;
    +      if (partitionInfo != null) {
    +        matchedPartitions = setMatchedPartitions(partitionIds, filter, partitionInfo);
    +        if (matchedPartitions != null) {
    +          if (matchedPartitions.cardinality() == 0) {
    +            return new ArrayList<InputSplit>();
    +          } else if (matchedPartitions.cardinality() == partitionInfo.getNumPartitions())
{
    +            matchedPartitions = null;
    +          }
    +        }
    +      }
    +
    +      FilterResolverIntf filterInterface = CarbonInputFormatUtil.resolveFilter(filter,
identifier);
    +      // do block filtering and get split
    +      List<InputSplit> splits = getSplits(job, filterInterface, segmentList, matchedPartitions,
    +          partitionInfo, oldPartitionIdList);
    +      // pass the invalid segment to task side in order to remove index entry in task
side
    +      if (invalidSegments.size() > 0) {
    +        for (InputSplit split : splits) {
    +          ((CarbonInputSplit) split).setInvalidSegments(invalidSegments);
    +          ((CarbonInputSplit) split).setInvalidTimestampRange(invalidTimestampsList);
    +        }
    +      }
    +      return splits;
    +    } catch (IOException e) {
    +      throw new RuntimeException("Can't get splits of the target segment ", e);
    +    }
    +  }
    +
    +  private BitSet setMatchedPartitions(String partitionIds, Expression filter,
    +      PartitionInfo partitionInfo) {
    +    BitSet matchedPartitions = null;
    +    if (null != partitionIds) {
    +      String[] partList = partitionIds.replace("[", "").replace("]", "").split(",");
    +      matchedPartitions = new BitSet(Integer.parseInt(partList[0]));
    --- End diff --
    
    Sure, I can add some simple comments in the code


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at infrastructure@apache.org or file a JIRA ticket
with INFRA.
---

Mime
View raw message