Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixes for #21, #22, #27 and #28 #1

Merged
merged 4 commits into from
Jul 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/main/java/fr/noop/subtitle/srt/SrtParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import fr.noop.subtitle.model.SubtitleParser;
import fr.noop.subtitle.model.SubtitleParsingException;
import fr.noop.subtitle.util.StringUtils;
import fr.noop.subtitle.util.SubtitlePlainText;
import fr.noop.subtitle.util.SubtitleTextLine;
import fr.noop.subtitle.util.SubtitleTimeCode;
Expand Down Expand Up @@ -61,13 +62,18 @@ public SrtObject parse(InputStream is, boolean strict) throws IOException, Subti
if (textLine.isEmpty()) {
continue;
}

// issue #22
// Remove BOM
textLine = StringUtils.removeBOM(textLine);


// New cue
cue = new SrtCue();

// First textLine is the cue number
try {
Integer.parseInt(textLine);
Integer.valueOf(textLine);
} catch (NumberFormatException e) {
throw new SubtitleParsingException(String.format(
"Unable to parse cue number: %s",
Expand Down
69 changes: 53 additions & 16 deletions src/main/java/fr/noop/subtitle/vtt/VttParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ private enum CursorStatus {
EMPTY_LINE,
CUE_ID,
CUE_TIMECODE,
NOTE,
CUE_TEXT;
}

Expand All @@ -43,6 +44,10 @@ private enum TagStatus {
}

private String charset; // Charset of the input files

// for #21
private final String TIMECODE_LINE_REGEX = "^(\\d\\d:)?(\\d\\d):(\\d\\d)\\.\\d\\d\\d --> (\\d\\d:)?(\\d\\d):(\\d\\d)\\.\\d\\d\\d ?.*";


public VttParser(String charset) {
this.charset = charset;
Expand All @@ -55,13 +60,14 @@ public VttObject parse(InputStream is) throws IOException, SubtitleParsingExcept

@Override
public VttObject parse(InputStream is, boolean strict) throws IOException, SubtitleParsingException {
// Create srt object
// Create vttObject object
VttObject vttObject = new VttObject();

// Read each lines
// Read each line
BufferedReader br = new BufferedReader(new InputStreamReader(is, this.charset));
String textLine = "";
CursorStatus cursorStatus = CursorStatus.NONE;
CursorStatus memorizedCursorStatus = CursorStatus.NONE;
VttCue cue = null;
String cueText = ""; // Text of the cue

Expand All @@ -74,46 +80,73 @@ public VttObject parse(InputStream is, boolean strict) throws IOException, Subti
}

// All Vtt files start with WEBVTT
if (cursorStatus == CursorStatus.NONE && textLine.equals("WEBVTT")) {
//if (cursorStatus == CursorStatus.NONE && textLine.equals("WEBVTT")) {
// changed for #27
if (cursorStatus == CursorStatus.NONE && textLine.startsWith("WEBVTT")) {
cursorStatus = CursorStatus.SIGNATURE;
continue;
}

if (textLine.startsWith("NOTE")){
memorizedCursorStatus = cursorStatus;
cursorStatus = CursorStatus.NOTE;
continue;
}
if (cursorStatus == CursorStatus.NOTE){
if (textLine.isEmpty()) {
// NOTE section is over
cursorStatus = memorizedCursorStatus;
}
// do nothing in any case
continue;

}




if (cursorStatus == CursorStatus.SIGNATURE ||
cursorStatus == CursorStatus.EMPTY_LINE) {
if (cursorStatus == CursorStatus.SIGNATURE || cursorStatus == CursorStatus.EMPTY_LINE) {
if (textLine.isEmpty()) {
continue;
}

// New cue
cue = new VttCue();
cursorStatus = CursorStatus.CUE_ID;

if (
textLine.length() < 16 ||
!textLine.substring(13, 16).equals("-->")
// changed for issue #21
//!textLine.substring(13, 16).equals("-->")
!(textLine.matches(TIMECODE_LINE_REGEX))
) {
// First textLine is the cue number
cue.setId(textLine);
continue;
}

// There is no cue number
}


// Second textLine defines the start and end time codes
// 00:01:21.456 --> 00:01:23.417
if (cursorStatus == CursorStatus.CUE_ID) {
if (textLine.length() < 29 ||
!textLine.substring(13, 16).equals("-->")
if (//textLine.length() < 29 ||
// changed for issue #21
//!textLine.substring(13, 16).equals("-->")
!(textLine.matches(TIMECODE_LINE_REGEX))
) {
throw new SubtitleParsingException(String.format(
"Timecode textLine is badly formated: %s", textLine));
}

cue.setStartTime(this.parseTimeCode(textLine.substring(0, 12)));
cue.setEndTime(this.parseTimeCode(textLine.substring(17)));
// changed for issue #21
//cue.setStartTime(this.parseTimeCode(textLine.substring(0, 12)));
//cue.setEndTime(this.parseTimeCode(textLine.substring(17)));
int arrowIndex = textLine.indexOf("-->");
cue.setStartTime(this.parseTimeCode(textLine.substring(0, arrowIndex).trim()));
cue.setEndTime(this.parseTimeCode(textLine.substring(arrowIndex + 3).trim()));
cursorStatus = CursorStatus.CUE_TIMECODE;
continue;
}
Expand Down Expand Up @@ -305,10 +338,14 @@ private List<SubtitleLine> parseCueText(String cueText) {

private SubtitleTimeCode parseTimeCode(String timeCodeString) throws SubtitleParsingException {
try {
int hour = Integer.parseInt(timeCodeString.substring(0, 2));
int minute = Integer.parseInt(timeCodeString.substring(3, 5));
int second = Integer.parseInt(timeCodeString.substring(6, 8));
int millisecond = Integer.parseInt(timeCodeString.substring(9, 12));
String adjustedTimeCodeString = timeCodeString;
if (timeCodeString.length()<10){
adjustedTimeCodeString = "00:" + timeCodeString;
}
int hour = Integer.parseInt(adjustedTimeCodeString.substring(0, 2));
int minute = Integer.parseInt(adjustedTimeCodeString.substring(3, 5));
int second = Integer.parseInt(adjustedTimeCodeString.substring(6, 8));
int millisecond = Integer.parseInt(adjustedTimeCodeString.substring(9, 12));
return new SubtitleTimeCode(hour, minute, second, millisecond);
} catch (NumberFormatException e) {
throw new SubtitleParsingException(String.format(
Expand Down