Skip to content

Commit

Permalink
Add parse and compare hooks
Browse files Browse the repository at this point in the history
This adds 4 hooks that can be set on the `JPlagOptions` class:

- A pre-parsing stage hook, which is called before all submissions are
  parsed. It is called with the list with all submission paths eligible
  for the comparison, i.e. all paths that are not filtered out by
  `SubmissionSetBuilder.isExcludedEntry`.
- A post-parsing hook per submission, which is called after the
  submission has been parsed. It is called with the `Submission`
  object that has been parsed.
- A pre-comparing stage hook, which is called before the comparisons
  are started. It is called with a list of all comparison tuples.
- A post-comparing hook, which is called after each comparison. It
  is called with the `SubmissionTuple` that has just been compared.
  • Loading branch information
olmokramer committed Sep 13, 2023
1 parent 48acddb commit d838320
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 24 deletions.
3 changes: 2 additions & 1 deletion cli/src/main/java/de/jplag/cli/CLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,8 @@ public JPlagOptions buildOptionsFromArguments(ParseResult parseResult) throws Cl
JPlagOptions jPlagOptions = new JPlagOptions(loadLanguage(parseResult), this.options.minTokenMatch, submissionDirectories,
oldSubmissionDirectories, null, this.options.advanced.subdirectory, suffixes, this.options.advanced.exclusionFileName,
JPlagOptions.DEFAULT_SIMILARITY_METRIC, this.options.advanced.similarityThreshold, this.options.shownComparisons, clusteringOptions,
this.options.advanced.debug, mergingOptions);
this.options.advanced.debug, mergingOptions, JPlagOptions.DEFAULT_PRE_PARSE_HOOK, JPlagOptions.DEFAULT_PARSE_HOOK,
JPlagOptions.DEFAULT_PRE_COMPARE_HOOK, JPlagOptions.DEFAULT_COMPARE_HOOK);

String baseCodePath = this.options.baseCode;
File baseCodeDirectory = baseCodePath == null ? null : new File(baseCodePath);
Expand Down
4 changes: 4 additions & 0 deletions core/src/main/java/de/jplag/SubmissionSet.java
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,8 @@ private void parseBaseCodeSubmission(Submission baseCode) throws BasecodeExcepti
* Parse all given submissions.
*/
private void parseSubmissions(List<Submission> submissions) {
this.options.preParseHook().callback(submissions);

if (submissions.isEmpty()) {
logger.warn("No submissions to parse!");
return;
Expand Down Expand Up @@ -167,6 +169,8 @@ private void parseSubmissions(List<Submission> submissions) {
} else {
logger.error("ERROR -> Submission {} removed", currentSubmissionName);
}

this.options.parseHook().callback(submission);
}

int validSubmissions = submissions.size() - errors - tooShort;
Expand Down
115 changes: 95 additions & 20 deletions core/src/main/java/de/jplag/options/JPlagOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@

import de.jplag.JPlag;
import de.jplag.Language;
import de.jplag.Submission;
import de.jplag.clustering.ClusteringOptions;
import de.jplag.exceptions.BasecodeException;
import de.jplag.merging.MergingOptions;
import de.jplag.strategy.SubmissionTuple;
import de.jplag.util.FileUtils;

/**
Expand Down Expand Up @@ -48,7 +50,8 @@
public record JPlagOptions(Language language, Integer minimumTokenMatch, Set<File> submissionDirectories, Set<File> oldSubmissionDirectories,
File baseCodeSubmissionDirectory, String subdirectoryName, List<String> fileSuffixes, String exclusionFileName,
SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions,
boolean debugParser, MergingOptions mergingOptions) {
boolean debugParser, MergingOptions mergingOptions, PreParseHook preParseHook, ParseHook parseHook, PreCompareHook preCompareHook,
CompareHook compareHook) {

public static final double DEFAULT_SIMILARITY_THRESHOLD = 0;
public static final int DEFAULT_SHOWN_COMPARISONS = 100;
Expand All @@ -61,13 +64,15 @@ public record JPlagOptions(Language language, Integer minimumTokenMatch, Set<Fil

public JPlagOptions(Language language, Set<File> submissionDirectories, Set<File> oldSubmissionDirectories) {
this(language, null, submissionDirectories, oldSubmissionDirectories, null, null, null, null, DEFAULT_SIMILARITY_METRIC,
DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions());
DEFAULT_SIMILARITY_THRESHOLD, DEFAULT_SHOWN_COMPARISONS, new ClusteringOptions(), false, new MergingOptions(), DEFAULT_PRE_PARSE_HOOK,
DEFAULT_PARSE_HOOK, DEFAULT_PRE_COMPARE_HOOK, DEFAULT_COMPARE_HOOK);
}

public JPlagOptions(Language language, Integer minimumTokenMatch, Set<File> submissionDirectories, Set<File> oldSubmissionDirectories,
File baseCodeSubmissionDirectory, String subdirectoryName, List<String> fileSuffixes, String exclusionFileName,
SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions,
boolean debugParser, MergingOptions mergingOptions) {
boolean debugParser, MergingOptions mergingOptions, PreParseHook preParseHook, ParseHook parseHook, PreCompareHook preCompareHook,
CompareHook compareHook) {
this.language = language;
this.debugParser = debugParser;
this.fileSuffixes = fileSuffixes == null || fileSuffixes.isEmpty() ? null : Collections.unmodifiableList(fileSuffixes);
Expand All @@ -82,90 +87,118 @@ public JPlagOptions(Language language, Integer minimumTokenMatch, Set<File> subm
this.subdirectoryName = subdirectoryName;
this.clusteringOptions = clusteringOptions;
this.mergingOptions = mergingOptions;
this.preParseHook = preParseHook;
this.parseHook = parseHook;
this.preCompareHook = preCompareHook;
this.compareHook = compareHook;
}

public JPlagOptions withLanguageOption(Language language) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withDebugParser(boolean debugParser) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withFileSuffixes(List<String> fileSuffixes) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withSimilarityThreshold(double similarityThreshold) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withMaximumNumberOfComparisons(int maximumNumberOfComparisons) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withSimilarityMetric(SimilarityMetric similarityMetric) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withMinimumTokenMatch(Integer minimumTokenMatch) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withExclusionFileName(String exclusionFileName) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withSubmissionDirectories(Set<File> submissionDirectories) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withOldSubmissionDirectories(Set<File> oldSubmissionDirectories) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withBaseCodeSubmissionDirectory(File baseCodeSubmissionDirectory) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withSubdirectoryName(String subdirectoryName) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withClusteringOptions(ClusteringOptions clusteringOptions) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withMergingOptions(MergingOptions mergingOptions) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withPreParseHook(PreParseHook preParseHook) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withParseHook(ParseHook parseHook) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withPreCompareHook(PreCompareHook preCompareHook) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public JPlagOptions withCompareHook(CompareHook compareHook) {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectories, oldSubmissionDirectories, baseCodeSubmissionDirectory,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
}

public boolean hasBaseCode() {
Expand Down Expand Up @@ -225,6 +258,46 @@ private Integer normalizeMinimumTokenMatch(Integer minimumTokenMatch) {
return (minimumTokenMatch != null && minimumTokenMatch < 1) ? Integer.valueOf(1) : minimumTokenMatch;
}

public interface PreParseHook {
void callback(List<Submission> submissions);
}

public static final PreParseHook DEFAULT_PRE_PARSE_HOOK = new PreParseHook() {
@Override
public void callback(List<Submission> submissions) {
}
};

public interface ParseHook {
void callback(Submission submission);
}

public static final ParseHook DEFAULT_PARSE_HOOK = new ParseHook() {
@Override
public void callback(Submission submission) {
}
};

public interface PreCompareHook {
void callback(List<SubmissionTuple> tuples);
}

public static final PreCompareHook DEFAULT_PRE_COMPARE_HOOK = new PreCompareHook() {
@Override
public void callback(List<SubmissionTuple> tuples) {
}
};

public interface CompareHook {
void callback(SubmissionTuple tuple);
}

public static final CompareHook DEFAULT_COMPARE_HOOK = new CompareHook() {
@Override
public void callback(SubmissionTuple tuple) {
}
};

/**
* Creates new options to configure {@link JPlag}.
* @param language Language to use when parsing the submissions.
Expand Down Expand Up @@ -254,10 +327,12 @@ private Integer normalizeMinimumTokenMatch(Integer minimumTokenMatch) {
public JPlagOptions(Language language, Integer minimumTokenMatch, File submissionDirectory, Set<File> oldSubmissionDirectories,
String baseCodeSubmissionName, String subdirectoryName, List<String> fileSuffixes, String exclusionFileName,
SimilarityMetric similarityMetric, double similarityThreshold, int maximumNumberOfComparisons, ClusteringOptions clusteringOptions,
boolean debugParser, MergingOptions mergingOptions) throws BasecodeException {
boolean debugParser, MergingOptions mergingOptions, PreParseHook preParseHook, ParseHook parseHook, PreCompareHook preCompareHook,
CompareHook compareHook) throws BasecodeException {
this(language, minimumTokenMatch, Set.of(submissionDirectory), oldSubmissionDirectories,
convertLegacyBaseCodeToFile(baseCodeSubmissionName, submissionDirectory), subdirectoryName, fileSuffixes, exclusionFileName,
similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions);
similarityMetric, similarityThreshold, maximumNumberOfComparisons, clusteringOptions, debugParser, mergingOptions, preParseHook,
parseHook, preCompareHook, compareHook);
}

/**
Expand All @@ -280,7 +355,7 @@ public JPlagOptions withBaseCodeSubmissionName(String baseCodeSubmissionName) {
try {
return new JPlagOptions(language, minimumTokenMatch, submissionDirectory, oldSubmissionDirectories, baseCodeSubmissionName,
subdirectoryName, fileSuffixes, exclusionFileName, similarityMetric, similarityThreshold, maximumNumberOfComparisons,
clusteringOptions, debugParser, mergingOptions);
clusteringOptions, debugParser, mergingOptions, preParseHook, parseHook, preCompareHook, compareHook);
} catch (BasecodeException e) {
throw new IllegalArgumentException(e.getMessage(), e.getCause());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,16 @@ protected void compareSubmissionsToBaseCode(SubmissionSet submissionSet) {
* Compares two submissions and optionally returns the results if similarity is high enough.
*/
protected Optional<JPlagComparison> compareSubmissions(Submission first, Submission second) {
return compareSubmissions(new SubmissionTuple(first, second));
}

protected Optional<JPlagComparison> compareSubmissions(SubmissionTuple tuple) {
Submission first = tuple.left();
Submission second = tuple.right();

JPlagComparison comparison = greedyStringTiling.compare(first, second);
logger.info("Comparing {}-{}: {}", first.getName(), second.getName(), comparison.similarity());
this.options.compareHook().callback(tuple);

if (options.similarityMetric().isAboveThreshold(comparison, options.similarityThreshold())) {
return Optional.of(comparison);
Expand All @@ -57,7 +65,7 @@ protected Optional<JPlagComparison> compareSubmissions(Submission first, Submiss
/**
* @return a list of all submission tuples to be processed.
*/
protected static List<SubmissionTuple> buildComparisonTuples(List<Submission> submissions) {
protected List<SubmissionTuple> buildComparisonTuples(List<Submission> submissions) {
List<SubmissionTuple> tuples = new ArrayList<>();
List<Submission> validSubmissions = submissions.stream().filter(s -> s.getTokenList() != null).toList();

Expand All @@ -70,6 +78,7 @@ protected static List<SubmissionTuple> buildComparisonTuples(List<Submission> su
}
}
}
this.options.preCompareHook().callback(tuples);
return tuples;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,7 @@ public JPlagResult compareSubmissions(SubmissionSet submissionSet) {
}

List<SubmissionTuple> tuples = buildComparisonTuples(submissionSet.getSubmissions());
List<JPlagComparison> comparisons = tuples.stream().parallel().map(tuple -> compareSubmissions(tuple.left(), tuple.right()))
.flatMap(Optional::stream).toList();
List<JPlagComparison> comparisons = tuples.stream().parallel().map(this::compareSubmissions).flatMap(Optional::stream).toList();

long durationInMillis = System.currentTimeMillis() - timeBeforeStartInMillis;
return new JPlagResult(comparisons, submissionSet, durationInMillis, options);
Expand Down

0 comments on commit d838320

Please sign in to comment.