From 96c008ab7f041c3303ce516dd8b4842b569ee898 Mon Sep 17 00:00:00 2001 From: George Tay Date: Fri, 16 Feb 2024 15:56:38 +0800 Subject: [PATCH 1/2] Enhance existing Regex code --- src/main/java/reposense/authorship/FileInfoExtractor.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/reposense/authorship/FileInfoExtractor.java b/src/main/java/reposense/authorship/FileInfoExtractor.java index 6b815690a1..9bd56172ef 100644 --- a/src/main/java/reposense/authorship/FileInfoExtractor.java +++ b/src/main/java/reposense/authorship/FileInfoExtractor.java @@ -142,13 +142,14 @@ public List getEditedFileInfos(RepoConfiguration config, String lastCo */ public Set getFiles(RepoConfiguration repoConfig, boolean isBinaryFile) { List modifiedFileList = GitDiff.getModifiedFilesList(Paths.get(repoConfig.getRepoRoot())); + Pattern tabSplitter = Pattern.compile("\t"); // Gets rid of files with invalid directory name and filters by the {@code isBinaryFile} flag return modifiedFileList.stream() .filter(file -> isBinaryFile == file.startsWith(BINARY_FILE_LINE_DIFF_RESULT)) - .map(file -> file.split("\t")[2]) + .map(file -> tabSplitter.split(file)[2]) .filter(FileUtil::isValidPathWithLogging) - .map(filteredFile -> Paths.get(filteredFile)) + .map(Paths::get) .collect(Collectors.toCollection(HashSet::new)); } @@ -160,11 +161,12 @@ private void setLinesToTrack(FileInfo fileInfo, String fileDiffResult) { String[] linesChangedChunk = fileDiffResult.split(LINE_CHUNKS_SEPARATOR); List lineInfos = fileInfo.getLines(); int fileLinePointer = 0; + Pattern newlineSplitter = Pattern.compile("\n"); // skips the header, index starts from 1 for (int sectionIndex = 1; sectionIndex < linesChangedChunk.length; sectionIndex++) { String linesChangedInSection = linesChangedChunk[sectionIndex]; - String[] linesChanged = linesChangedInSection.split("\n"); + String[] linesChanged = newlineSplitter.split(linesChangedInSection); int startingLineNumber = getStartingLineNumber(linesChanged[LINE_CHANGED_HEADER_INDEX]); // mark all untouched lines between sections as untracked From 36ae8064303cabe405ad4f00a5d01fde1ff72dc4 Mon Sep 17 00:00:00 2001 From: George Tay Date: Fri, 23 Feb 2024 14:59:14 +0800 Subject: [PATCH 2/2] Consolidate typical Regex patterns --- .../java/reposense/authorship/FileInfoAnalyzer.java | 3 ++- .../reposense/authorship/FileInfoExtractor.java | 7 +++---- src/main/java/reposense/git/GitCatFile.java | 3 ++- src/main/java/reposense/git/GitConfig.java | 8 ++++++-- src/main/java/reposense/git/GitDiff.java | 5 ++++- src/main/java/reposense/git/GitLog.java | 5 +++-- src/main/java/reposense/git/GitRemote.java | 7 +++++-- src/main/java/reposense/git/GitRevList.java | 5 ++++- src/main/java/reposense/git/GitShortlog.java | 5 +++-- src/main/java/reposense/git/GitShow.java | 3 ++- src/main/java/reposense/model/CommitHash.java | 3 ++- src/main/java/reposense/util/StringsUtil.java | 9 +++++---- src/test/java/reposense/util/TestUtil.java | 13 ++++++------- 13 files changed, 47 insertions(+), 29 deletions(-) diff --git a/src/main/java/reposense/authorship/FileInfoAnalyzer.java b/src/main/java/reposense/authorship/FileInfoAnalyzer.java index 4114b23f3b..5efd8ce7c3 100644 --- a/src/main/java/reposense/authorship/FileInfoAnalyzer.java +++ b/src/main/java/reposense/authorship/FileInfoAnalyzer.java @@ -22,6 +22,7 @@ import reposense.model.RepoConfiguration; import reposense.system.LogsManager; import reposense.util.FileUtil; +import reposense.util.StringsUtil; /** * Analyzes the target and information given in the {@link FileInfo}. @@ -149,7 +150,7 @@ private void aggregateBlameAuthorModifiedAndDateInfo(RepoConfiguration config, F blameResults = getGitBlameWithPreviousAuthorsResult(config, fileInfo.getPath()); } - String[] blameResultLines = blameResults.split("\n"); + String[] blameResultLines = StringsUtil.NEWLINE.split(blameResults); Path filePath = Paths.get(fileInfo.getPath()); LocalDateTime sinceDate = config.getSinceDate(); LocalDateTime untilDate = config.getUntilDate(); diff --git a/src/main/java/reposense/authorship/FileInfoExtractor.java b/src/main/java/reposense/authorship/FileInfoExtractor.java index 9bd56172ef..a79b9e46f0 100644 --- a/src/main/java/reposense/authorship/FileInfoExtractor.java +++ b/src/main/java/reposense/authorship/FileInfoExtractor.java @@ -27,6 +27,7 @@ import reposense.model.RepoConfiguration; import reposense.system.LogsManager; import reposense.util.FileUtil; +import reposense.util.StringsUtil; /** * Extracts out all the relevant {@code FileInfo} from the repository. @@ -142,12 +143,11 @@ public List getEditedFileInfos(RepoConfiguration config, String lastCo */ public Set getFiles(RepoConfiguration repoConfig, boolean isBinaryFile) { List modifiedFileList = GitDiff.getModifiedFilesList(Paths.get(repoConfig.getRepoRoot())); - Pattern tabSplitter = Pattern.compile("\t"); // Gets rid of files with invalid directory name and filters by the {@code isBinaryFile} flag return modifiedFileList.stream() .filter(file -> isBinaryFile == file.startsWith(BINARY_FILE_LINE_DIFF_RESULT)) - .map(file -> tabSplitter.split(file)[2]) + .map(file -> StringsUtil.TAB.split(file)[2]) .filter(FileUtil::isValidPathWithLogging) .map(Paths::get) .collect(Collectors.toCollection(HashSet::new)); @@ -161,12 +161,11 @@ private void setLinesToTrack(FileInfo fileInfo, String fileDiffResult) { String[] linesChangedChunk = fileDiffResult.split(LINE_CHUNKS_SEPARATOR); List lineInfos = fileInfo.getLines(); int fileLinePointer = 0; - Pattern newlineSplitter = Pattern.compile("\n"); // skips the header, index starts from 1 for (int sectionIndex = 1; sectionIndex < linesChangedChunk.length; sectionIndex++) { String linesChangedInSection = linesChangedChunk[sectionIndex]; - String[] linesChanged = newlineSplitter.split(linesChangedInSection); + String[] linesChanged = StringsUtil.NEWLINE.split(linesChangedInSection); int startingLineNumber = getStartingLineNumber(linesChanged[LINE_CHANGED_HEADER_INDEX]); // mark all untouched lines between sections as untracked diff --git a/src/main/java/reposense/git/GitCatFile.java b/src/main/java/reposense/git/GitCatFile.java index f47bbb5a82..ecd62d1d62 100644 --- a/src/main/java/reposense/git/GitCatFile.java +++ b/src/main/java/reposense/git/GitCatFile.java @@ -10,6 +10,7 @@ import reposense.git.exception.CommitNotFoundException; import reposense.system.LogsManager; +import reposense.util.StringsUtil; /** * Contains git cat file related functionalities. @@ -31,7 +32,7 @@ public static List getParentCommits(String root, String commitHash) thro try { String output = runCommand(rootPath, catFileCommand); List parentCommits = new ArrayList<>(); - for (String line : output.split("\n")) { + for (String line : StringsUtil.NEWLINE.split(output)) { if (line.startsWith("parent")) { parentCommits.add(line.substring(7).trim()); } diff --git a/src/main/java/reposense/git/GitConfig.java b/src/main/java/reposense/git/GitConfig.java index aa1e1d4d39..021344938c 100644 --- a/src/main/java/reposense/git/GitConfig.java +++ b/src/main/java/reposense/git/GitConfig.java @@ -6,10 +6,12 @@ import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; +import java.util.regex.Pattern; import java.util.stream.Collectors; import reposense.system.CommandRunner; import reposense.system.LogsManager; +import reposense.util.StringsUtil; /** * Contains git config related functionalities. @@ -36,10 +38,12 @@ public class GitConfig { * @return a list of string arrays where 0-index is key and 1-index is value. */ public static List getGlobalGitLfsConfig() { + Pattern equals = Pattern.compile("="); + try { String gitConfig = getGitGlobalConfig(); - return Arrays.stream(gitConfig.split("\n")) - .map(line -> line.split("=")) + return Arrays.stream(StringsUtil.NEWLINE.split(gitConfig)) + .map(equals::split) .filter(line -> line[0].equals(FILTER_LFS_SMUDGE_KEY) || line[0].equals((FILTER_LFS_PROCESS_KEY))) .collect(Collectors.toList()); } catch (RuntimeException re) { diff --git a/src/main/java/reposense/git/GitDiff.java b/src/main/java/reposense/git/GitDiff.java index 2e070aadd1..20119281dd 100644 --- a/src/main/java/reposense/git/GitDiff.java +++ b/src/main/java/reposense/git/GitDiff.java @@ -7,6 +7,9 @@ import java.util.Arrays; import java.util.List; +import reposense.util.StringsUtil; + + /** * Contains git diff related functionalities. * Git diff is responsible for obtaining the changes between commits, commit and working tree, etc. @@ -32,6 +35,6 @@ public static List getModifiedFilesList(Path repoRoot) { String diffCommand = String.format("git diff --ignore-submodules=all --numstat %s %s", EMPTY_TREE_HASH, CHECKED_OUT_COMMIT_REFERENCE); String diffResult = runCommand(repoRoot.toAbsolutePath(), diffCommand); - return Arrays.asList(diffResult.split("\n")); + return Arrays.asList(StringsUtil.NEWLINE.split(diffResult)); } } diff --git a/src/main/java/reposense/git/GitLog.java b/src/main/java/reposense/git/GitLog.java index f9540ec143..1da51148b2 100644 --- a/src/main/java/reposense/git/GitLog.java +++ b/src/main/java/reposense/git/GitLog.java @@ -11,6 +11,7 @@ import reposense.model.Author; import reposense.model.RepoConfiguration; +import reposense.util.StringsUtil; /** * Contains git log related functionalities. @@ -69,8 +70,8 @@ public static List getFileAuthors(RepoConfiguration config, String fil command += " " + addQuotesForFilePath(filePath); String result = runCommand(rootPath, command); - return Arrays.stream(result.split("\n")) - .map(authorAndEmailLine -> authorAndEmailLine.split("\t")) + return Arrays.stream(StringsUtil.NEWLINE.split(result)) + .map(StringsUtil.TAB::split) .map(authorAndEmailArray -> authorAndEmailArray.length == 1 ? new String[] {authorAndEmailArray[0], DEFAULT_EMAIL_IF_MISSING} : authorAndEmailArray) diff --git a/src/main/java/reposense/git/GitRemote.java b/src/main/java/reposense/git/GitRemote.java index 1fd116c923..0c72cce1e4 100644 --- a/src/main/java/reposense/git/GitRemote.java +++ b/src/main/java/reposense/git/GitRemote.java @@ -6,9 +6,11 @@ import java.util.Map; import java.util.Optional; import java.util.logging.Logger; +import java.util.regex.Pattern; import reposense.system.CommandRunner; import reposense.system.LogsManager; +import reposense.util.StringsUtil; /** * Contains git remote related functionality. @@ -27,6 +29,7 @@ public class GitRemote { * @return Map of keys of the form REMOTE_NAME(fetch) or REMOTE_NAME(push) to their corresponding remote URLs. */ public static Map getRemotes(String repoRoot) { + Pattern anyTabPattern = Pattern.compile("[ \\t]+"); Map remotes = new HashMap<>(); String result; try { @@ -36,8 +39,8 @@ public static Map getRemotes(String repoRoot) { return remotes; } - Arrays.stream(result.split("\n")) - .map(s -> s.split("[ \\t]+")) + Arrays.stream(StringsUtil.NEWLINE.split(result)) + .map(anyTabPattern::split) .forEach(l -> { if (l.length == 3) { // l[0]: remote name diff --git a/src/main/java/reposense/git/GitRevList.java b/src/main/java/reposense/git/GitRevList.java index d5af01637a..a1820000e7 100644 --- a/src/main/java/reposense/git/GitRevList.java +++ b/src/main/java/reposense/git/GitRevList.java @@ -10,6 +10,9 @@ import java.util.Arrays; import java.util.List; +import reposense.util.StringsUtil; + + /** * Contains git rev list related functionalities. * Git rev list is responsible for showing commit objects in reverse chronological order. @@ -106,7 +109,7 @@ public static List getRootCommits(String root) { String revListCommand = "git rev-list --max-parents=0 HEAD"; Path rootPath = Paths.get(root); String output = runCommand(rootPath, revListCommand); - return Arrays.asList(output.split("\n")); + return Arrays.asList(StringsUtil.NEWLINE.split(output)); } /** diff --git a/src/main/java/reposense/git/GitShortlog.java b/src/main/java/reposense/git/GitShortlog.java index 920eb84177..f4572a317c 100644 --- a/src/main/java/reposense/git/GitShortlog.java +++ b/src/main/java/reposense/git/GitShortlog.java @@ -13,6 +13,7 @@ import reposense.model.Author; import reposense.model.RepoConfiguration; +import reposense.util.StringsUtil; /** * Contains git shortlog related functionalities. @@ -32,9 +33,9 @@ public static List getAuthors(RepoConfiguration config) { return Collections.emptyList(); } - String[] lines = summary.split("\n"); + String[] lines = StringsUtil.NEWLINE.split(summary); return Arrays.stream(lines) - .map(line -> new Author(line.split("\t")[1])) + .map(line -> new Author(StringsUtil.TAB.split(line)[1])) .collect(Collectors.toList()); } diff --git a/src/main/java/reposense/git/GitShow.java b/src/main/java/reposense/git/GitShow.java index a4881ea97f..cab4b6723c 100644 --- a/src/main/java/reposense/git/GitShow.java +++ b/src/main/java/reposense/git/GitShow.java @@ -15,6 +15,7 @@ import reposense.git.exception.CommitNotFoundException; import reposense.model.CommitHash; import reposense.system.LogsManager; +import reposense.util.StringsUtil; /** * Contains git show related functionalities. @@ -36,7 +37,7 @@ public static CommitHash getExpandedCommitHash(String root, String shortCommitHa try { String output = runCommand(rootPath, showCommand); - List commitHashes = Arrays.stream(output.split("\n")) + List commitHashes = Arrays.stream(StringsUtil.NEWLINE.split(output)) .map(CommitHash::new).collect(Collectors.toList()); if (commitHashes.size() > 1) { logger.warning(String.format("%s can be expanded to %d different commits, " diff --git a/src/main/java/reposense/model/CommitHash.java b/src/main/java/reposense/model/CommitHash.java index f5035f2177..1a0fbbc48e 100644 --- a/src/main/java/reposense/model/CommitHash.java +++ b/src/main/java/reposense/model/CommitHash.java @@ -6,6 +6,7 @@ import java.util.stream.Stream; import reposense.git.GitRevList; +import reposense.util.StringsUtil; /** * Represents a git commit hash in {@code RepoConfiguration}. @@ -77,7 +78,7 @@ public static Stream getHashes(String root, String branchName, Commi String[] startAndEnd = entry.toString().split("\\.\\."); String revList = GitRevList.getCommitHashInRange(root, branchName, startAndEnd[0], startAndEnd[1]); - return Arrays.stream(revList.split("\n")) + return Arrays.stream(StringsUtil.NEWLINE.split(revList)) .map(CommitHash::new); } diff --git a/src/main/java/reposense/util/StringsUtil.java b/src/main/java/reposense/util/StringsUtil.java index 790c394692..a5e0480dac 100644 --- a/src/main/java/reposense/util/StringsUtil.java +++ b/src/main/java/reposense/util/StringsUtil.java @@ -6,18 +6,19 @@ * Contains strings related utilities. */ public class StringsUtil { - + public static final Pattern NEWLINE = Pattern.compile("\n"); + public static final Pattern TAB = Pattern.compile("\t"); + public static final Pattern NUMERIC = Pattern.compile("^\\d+$"); private static final Pattern SPECIAL_SYMBOLS = Pattern.compile("[@;:&/\\\\!<>{}%#\"\\-='()\\[\\].+*?^$|]"); /** * Filters the {@code text}, returning only the lines that matches the given {@code regex}. */ public static String filterText(String text, String regex) { - String[] split = text.split("\n"); StringBuilder sb = new StringBuilder(); Pattern regexPattern = Pattern.compile(regex); - for (String line: split) { + for (String line: NEWLINE.split(text)) { if (regexPattern.matcher(line).matches()) { sb.append(line).append("\n"); } @@ -91,6 +92,6 @@ public static String removeTrailingBackslash(String string) { * Returns true iff {@code string} is purely numeric. */ public static boolean isNumeric(String string) { - return Pattern.compile("^\\d+$").matcher(string).matches(); + return NUMERIC.matcher(string).matches(); } } diff --git a/src/test/java/reposense/util/TestUtil.java b/src/test/java/reposense/util/TestUtil.java index 73213e5c58..ba5a5b7ec4 100644 --- a/src/test/java/reposense/util/TestUtil.java +++ b/src/test/java/reposense/util/TestUtil.java @@ -31,7 +31,6 @@ public class TestUtil { + ">> %s\n"; private static final String MESSAGE_LINES_LENGTH_DIFFERENT = "The files' lines count do not match."; - private static final String TAB_SPLITTER = "\t"; private static final String MOVED_FILE_INDICATION = "=> "; private static final int STAT_FILE_PATH_INDEX = 2; @@ -54,10 +53,10 @@ public static boolean compareFileContents(Path expected, Path actual, int maxTra System.out.println(String.format(MESSAGE_COMPARING_FILES, expected, actual)); - String[] expectedContent = new String(Files.readAllBytes(expected)) - .replace("\r", "").split("\n"); - String[] actualContent = new String(Files.readAllBytes(actual)) - .replace("\r", "").split("\n"); + String[] expectedContent = StringsUtil.NEWLINE.split(new String(Files.readAllBytes(expected)) + .replace("\r", "")); + String[] actualContent = StringsUtil.NEWLINE.split(new String(Files.readAllBytes(actual)) + .replace("\r", "")); for (int i = 0; i < Math.min(expectedContent.length, actualContent.length); i++) { if (!expectedContent[i].equals(actualContent[i])) { @@ -197,7 +196,7 @@ public static boolean compareNumberFilesChanged(int expectedNumberFilesChanged, */ private static Set getFilesChangedInCommit(String rawCommitInfo) { Set filesChanged = new HashSet<>(); - String[] commitInfo = rawCommitInfo.replaceAll("\n+$", "").split("\n"); + String[] commitInfo = StringsUtil.NEWLINE.split(rawCommitInfo.replaceAll("\n+$", "")); int fileChangedNum = Integer.parseInt(commitInfo[commitInfo.length - 1].trim().split(" ")[0]); for (int fileNum = 0; fileNum < fileChangedNum; fileNum++) { filesChanged.add(getFileChanged(commitInfo[commitInfo.length - 2 - fileNum])); @@ -209,7 +208,7 @@ private static Set getFilesChangedInCommit(String rawCommitInfo) { * Returns the file changed given a {@code rawFileChangedString}. */ private static String getFileChanged(String rawFileChangedString) { - String fileChanged = rawFileChangedString.split(TAB_SPLITTER)[STAT_FILE_PATH_INDEX].trim(); + String fileChanged = StringsUtil.TAB.split(rawFileChangedString)[STAT_FILE_PATH_INDEX].trim(); if (fileChanged.contains(MOVED_FILE_INDICATION)) { fileChanged = fileChanged.substring(fileChanged.indexOf(MOVED_FILE_INDICATION) + MOVED_FILE_INDICATION.length());