flink-issues mailing list archives

Site index · List index
Message view « Date » · « Thread »
Top « Date » · « Thread »
From "ASF GitHub Bot (JIRA)" <j...@apache.org>
Subject [jira] [Commented] (FLINK-2314) Make Streaming File Sources Persistent
Date Tue, 25 Aug 2015 15:46:47 GMT

    [ https://issues.apache.org/jira/browse/FLINK-2314?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=14711480#comment-14711480
] 

ASF GitHub Bot commented on FLINK-2314:
---------------------------------------

Github user tillrohrmann commented on a diff in the pull request:

    https://github.com/apache/flink/pull/997#discussion_r37881537
  
    --- Diff: flink-staging/flink-streaming/flink-streaming-core/src/test/java/org/apache/flink/streaming/util/FileSourceFunctionTest.java
---
    @@ -0,0 +1,208 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.flink.streaming.util;
    +
    +import org.apache.flink.api.common.ExecutionConfig;
    +import org.apache.flink.api.common.accumulators.Accumulator;
    +import org.apache.flink.api.common.functions.RuntimeContext;
    +import org.apache.flink.api.common.io.FileInputFormat;
    +import org.apache.flink.api.java.functions.KeySelector;
    +import org.apache.flink.api.java.typeutils.TypeExtractor;
    +import org.apache.flink.configuration.Configuration;
    +import org.apache.flink.core.fs.FileInputSplit;
    +import org.apache.flink.core.fs.Path;
    +import org.apache.flink.core.io.InputSplit;
    +import org.apache.flink.runtime.operators.testutils.MockEnvironment;
    +import org.apache.flink.runtime.operators.testutils.MockInputSplitProvider;
    +import org.apache.flink.runtime.state.LocalStateHandle;
    +import org.apache.flink.streaming.api.functions.source.FileSourceFunction;
    +import org.apache.flink.streaming.api.functions.source.SourceFunction;
    +import org.apache.flink.streaming.api.watermark.Watermark;
    +import org.apache.flink.streaming.runtime.tasks.StreamingRuntimeContext;
    +import org.apache.flink.types.IntValue;
    +import org.junit.Assert;
    +import org.junit.Test;
    +
    +import java.io.IOException;
    +import java.io.Serializable;
    +import java.util.ArrayList;
    +import java.util.HashMap;
    +import java.util.List;
    +
    +public class FileSourceFunctionTest {
    +	@Test
    +	public void testFileSourceFunction() {
    +		DummyFileInputFormat inputFormat = new DummyFileInputFormat();
    +		RuntimeContext runtimeContext = new StreamingRuntimeContext("MockTask", new MockEnvironment(3
* 1024 * 1024,
    +				inputFormat.getDummyInputSplitProvider(), 1024), null, new ExecutionConfig(), new
DummyModKey(2),
    +				new LocalStateHandle.LocalStateHandleProvider<Serializable>(), new HashMap<String,
Accumulator<?, ?>>());
    +
    +		inputFormat.setFilePath("file:///some/none/existing/directory/");
    +		FileSourceFunction<IntValue> fileSourceFunction = new FileSourceFunction<IntValue>(inputFormat,
TypeExtractor.getInputFormatTypes(inputFormat));
    +
    +		fileSourceFunction.setRuntimeContext(runtimeContext);
    +		DummyContext<IntValue> ctx = new DummyContext<IntValue>();
    +		try {
    +			fileSourceFunction.open(new Configuration());
    +			fileSourceFunction.run(ctx);
    +		} catch (Exception e) {
    +			e.printStackTrace();
    +		}
    +		Assert.assertTrue(ctx.getData().size() == 200);
    +	}
    +
    +	@Test
    +	public void testFileSourceFunctionCheckpoint() {
    +		DummyFileInputFormat inputFormat = new DummyFileInputFormat();
    +		RuntimeContext runtimeContext = new StreamingRuntimeContext("MockTask", new MockEnvironment(3
* 1024 * 1024,
    +				inputFormat.getDummyInputSplitProvider(), 1024), null, new ExecutionConfig(), new
DummyModKey(2),
    +				new LocalStateHandle.LocalStateHandleProvider<Serializable>(), new HashMap<String,
Accumulator<?, ?>>());
    +
    +		inputFormat.setFilePath("file:///some/none/existing/directory/");
    +		FileSourceFunction<IntValue> fileSourceFunction = new FileSourceFunction<IntValue>(inputFormat,
TypeExtractor.getInputFormatTypes(inputFormat));
    +		fileSourceFunction.setRuntimeContext(runtimeContext);
    +		DummyContext<IntValue> ctx = new DummyContext<IntValue>();
    +		try {
    +			fileSourceFunction.open(new Configuration());
    +			fileSourceFunction.restoreState("100:1");
    +			fileSourceFunction.run(ctx);
    +		} catch (Exception e) {
    +			e.printStackTrace();
    --- End diff --
    
    Same here as above.


> Make Streaming File Sources Persistent
> --------------------------------------
>
>                 Key: FLINK-2314
>                 URL: https://issues.apache.org/jira/browse/FLINK-2314
>             Project: Flink
>          Issue Type: Improvement
>          Components: Streaming
>    Affects Versions: 0.9
>            Reporter: Stephan Ewen
>            Assignee: Sheetal Parade
>              Labels: easyfix, starter
>
> Streaming File sources should participate in the checkpointing. They should track the
bytes they read from the file and checkpoint it.
> One can look at the sequence generating source function for an example of a checkpointed
source.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

Mime
View raw message