Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mateusz #1

Open
wants to merge 21 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
113 changes: 113 additions & 0 deletions src/main/java/Q6_ITERQueryI1T3.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import org.apache.flink.api.common.JobExecutionResult;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Q6 is also already there but has other names and I took your Q8 classes, so in essence all should be there now and we can close this PR

import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.java.tuple.*;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
import util.*;

import java.util.HashSet;
import java.util.concurrent.TimeUnit;

/**
* Run with these parameters
* --input ./src/main/resources/QnV.csv
*/

public class Q6_ITERQueryI1T3 {
public static void main(String[] args) throws Exception {

final ParameterTool parameters = ParameterTool.fromArgs(args);
// Checking input parameters
if (!parameters.has("input")) {
throw new Exception("Input Data is not specified");
}

String file = parameters.get("input");
String outputPath;
long throughput = parameters.getLong("tput", 0);
int times = parameters.getInt("times", 3);
Integer velFilter = parameters.getInt("vel", 205);
Integer windowSize = parameters.getInt("wsize", 15);

if (!parameters.has("output")) {
outputPath = file.replace(".csv", "_resultQ6_I1T3_ASP.csv");
} else {
outputPath = parameters.get("output");
}

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

DataStream<KeyedDataPointGeneral> input = env.addSource(new KeyedDataPointSourceFunction(file, throughput))
.assignTimestampsAndWatermarks(new UDFs.ExtractTimestamp(60000));

input.flatMap(new ThroughputLogger<KeyedDataPointGeneral>(KeyedDataPointSourceFunction.RECORD_SIZE_IN_BYTE, throughput));

DataStream<Tuple2<KeyedDataPointGeneral, Integer>> velStream = input
.filter(t -> ((Double) t.getValue()) >= velFilter && (t instanceof VelocityEvent))
.map(new UDFs.MapKey());

// iter2
DataStream<Tuple4<KeyedDataPointGeneral, KeyedDataPointGeneral, Long, Integer>> it2 = velStream.join(velStream)
.where(new UDFs.getArtificalKey())
.equalTo(new UDFs.getArtificalKey())
.window(SlidingEventTimeWindows.of(Time.minutes(windowSize), Time.minutes(1)))
.apply(new FlatJoinFunction<Tuple2<KeyedDataPointGeneral, Integer>, Tuple2<KeyedDataPointGeneral, Integer>, Tuple4<KeyedDataPointGeneral, KeyedDataPointGeneral, Long, Integer>>() {
final HashSet<Tuple4<KeyedDataPointGeneral, KeyedDataPointGeneral, Long, Integer>> set = new HashSet<Tuple4<KeyedDataPointGeneral, KeyedDataPointGeneral, Long, Integer>>(1000);
@Override
public void join(Tuple2<KeyedDataPointGeneral, Integer> d1, Tuple2<KeyedDataPointGeneral, Integer> d2, Collector<Tuple4<KeyedDataPointGeneral, KeyedDataPointGeneral, Long, Integer>> collector) throws Exception {
if (d1.f0.getTimeStampMs() < d2.f0.getTimeStampMs() && (Double) d1.f0.getValue() < (Double) d2.f0.getValue()) {
collector.collect(new Tuple4<>(d1.f0, d2.f0, d1.f0.getTimeStampMs(), 1));
Tuple4<KeyedDataPointGeneral, KeyedDataPointGeneral, Long, Integer> result = new Tuple4<>(d1.f0, d2.f0, d1.f0.getTimeStampMs(), 1);
if (!set.contains(result)) {
if (set.size() == 1000) {
set.removeAll(set);
// to maintain the HashSet Size we flush after 1000 entries
}
collector.collect(result);
set.add(result);

}
}
}
}).assignTimestampsAndWatermarks(new UDFs.ExtractTimestamp2KeyedDataPointGeneralLongInt(60000));

DataStream<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral>> it3 = it2.join(velStream)
.where(new UDFs.getArtificalKeyT4())
.equalTo(new UDFs.getArtificalKey())
.window(SlidingEventTimeWindows.of(Time.minutes(windowSize), Time.minutes(1)))
.apply(new FlatJoinFunction<Tuple4<KeyedDataPointGeneral, KeyedDataPointGeneral, Long, Integer>, Tuple2<KeyedDataPointGeneral, Integer>, Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral>>() {
final HashSet<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral>> set = new HashSet<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral>>(1000);
@Override
public void join(Tuple4<KeyedDataPointGeneral, KeyedDataPointGeneral, Long, Integer> d1, Tuple2<KeyedDataPointGeneral, Integer> d2, Collector<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral>> collector) throws Exception {
if (d1.f1.getTimeStampMs() < d2.f0.getTimeStampMs() && (Double) d1.f1.getValue() < (Double) d2.f0.getValue()) {
Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral> result = new Tuple3<>(d1.f0, d1.f1, d2.f0);
if (!set.contains(result)) {
if (set.size() == 1000) {
set.removeAll(set);
// to maintain the HashSet Size we flush after 1000 entries
}
collector.collect(result);
set.add(result);

}
}
}
});

it3.flatMap(new LatencyLoggerT3());
it3//.print();
.writeAsText(outputPath, FileSystem.WriteMode.OVERWRITE);

JobExecutionResult executionResult = env.execute("My FlinkASP Job");
System.out.println("The job took " + executionResult.getNetRuntime(TimeUnit.MILLISECONDS) + "ms to execute");

}
}

90 changes: 90 additions & 0 deletions src/main/java/Q6_ITERQueryI1T3_IntervalJoin.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.functions.FlatJoinFunction;
import org.apache.flink.api.java.tuple.*;
import org.apache.flink.api.java.utils.ParameterTool;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction;
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
import util.*;

import java.util.HashSet;
import java.util.concurrent.TimeUnit;

/**
* Run with these parameters
* --input ./src/main/resources/QnV.csv
*/

public class Q6_ITERQueryI1T3_IntervalJoin {
public static void main(String[] args) throws Exception {

final ParameterTool parameters = ParameterTool.fromArgs(args);
// Checking input parameters
if (!parameters.has("input")) {
throw new Exception("Input Data is not specified");
}

String file = parameters.get("input");
String outputPath;
long throughput = parameters.getLong("tput", 0);
int times = parameters.getInt("times", 3);
Integer velFilter = parameters.getInt("vel", 205);
Integer windowSize = parameters.getInt("wsize", 15);

if (!parameters.has("output")) {
outputPath = file.replace(".csv", "_resultQ6_I1T3_ASP.csv");
} else {
outputPath = parameters.get("output");
}

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

DataStream<KeyedDataPointGeneral> input = env.addSource(new KeyedDataPointSourceFunction(file, throughput))
.assignTimestampsAndWatermarks(new UDFs.ExtractTimestamp(60000));

input.flatMap(new ThroughputLogger<KeyedDataPointGeneral>(KeyedDataPointSourceFunction.RECORD_SIZE_IN_BYTE, throughput));

DataStream<KeyedDataPointGeneral> velStream = input
.filter(t -> ((Double) t.getValue()) >= velFilter && (t instanceof VelocityEvent));

// iter2
DataStream<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, Long>> it2 = velStream.keyBy(KeyedDataPointGeneral::getKey)
.intervalJoin(velStream.keyBy(KeyedDataPointGeneral::getKey))
.between(Time.seconds(1), Time.seconds((windowSize*60)-1))
.process(new ProcessJoinFunction<KeyedDataPointGeneral, KeyedDataPointGeneral, Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, Long>>() {
@Override
public void processElement(KeyedDataPointGeneral d1, KeyedDataPointGeneral d2, ProcessJoinFunction<KeyedDataPointGeneral, KeyedDataPointGeneral, Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, Long>>.Context context, Collector<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, Long>> collector) throws Exception {
if (d1.getTimeStampMs() < d2.getTimeStampMs() && (Double) d1.getValue() < (Double) d2.getValue()) {
collector.collect(new Tuple3<>(d1, d2, d1.getTimeStampMs()));
}
}
}).assignTimestampsAndWatermarks(new UDFs.ExtractTimestamp2KeyedDataPointGeneralLong(60000));

DataStream<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral>> it3 = it2.keyBy(new UDFs.getKeyT3())
.intervalJoin(velStream.keyBy(KeyedDataPointGeneral::getKey))
.between(Time.seconds(1), Time.seconds((windowSize*60)-1))
.process(new ProcessJoinFunction<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, Long>, KeyedDataPointGeneral, Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral>>() {
@Override
public void processElement(Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, Long> d1, KeyedDataPointGeneral d2, ProcessJoinFunction<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, Long>, KeyedDataPointGeneral, Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral>>.Context context, Collector<Tuple3<KeyedDataPointGeneral, KeyedDataPointGeneral, KeyedDataPointGeneral>> collector) throws Exception {
if (d1.f1.getTimeStampMs() < d2.getTimeStampMs() && (Double) d1.f1.getValue() < (Double) d2.getValue()) {
collector.collect(new Tuple3<>(d1.f0, d1.f1, d2));
}
}
});

it3.flatMap(new LatencyLoggerT3());
it3//.print();
.writeAsText(outputPath, FileSystem.WriteMode.OVERWRITE);

JobExecutionResult executionResult = env.execute("My FlinkASP Job");
System.out.println("The job took " + executionResult.getNetRuntime(TimeUnit.MILLISECONDS) + "ms to execute");

}
}

Loading