Skip to content

Commit

Permalink
Merge pull request #1 from berndmoos/master
Browse files Browse the repository at this point in the history
  • Loading branch information
aparamythis authored Jul 18, 2024
2 parents 04fdf39 + 8c33805 commit 678fd49
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 17 deletions.
8 changes: 7 additions & 1 deletion src/main/java/fr/noop/subtitle/srt/SrtParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@

import fr.noop.subtitle.model.SubtitleParser;
import fr.noop.subtitle.model.SubtitleParsingException;
import fr.noop.subtitle.util.StringUtils;
import fr.noop.subtitle.util.SubtitlePlainText;
import fr.noop.subtitle.util.SubtitleTextLine;
import fr.noop.subtitle.util.SubtitleTimeCode;
Expand Down Expand Up @@ -61,13 +62,18 @@ public SrtObject parse(InputStream is, boolean strict) throws IOException, Subti
if (textLine.isEmpty()) {
continue;
}

// issue #22
// Remove BOM
textLine = StringUtils.removeBOM(textLine);


// New cue
cue = new SrtCue();

// First textLine is the cue number
try {
Integer.parseInt(textLine);
Integer.valueOf(textLine);
} catch (NumberFormatException e) {
throw new SubtitleParsingException(String.format(
"Unable to parse cue number: %s",
Expand Down
69 changes: 53 additions & 16 deletions src/main/java/fr/noop/subtitle/vtt/VttParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ private enum CursorStatus {
EMPTY_LINE,
CUE_ID,
CUE_TIMECODE,
NOTE,
CUE_TEXT;
}

Expand All @@ -43,6 +44,10 @@ private enum TagStatus {
}

private String charset; // Charset of the input files

// for #21
private final String TIMECODE_LINE_REGEX = "^(\\d\\d:)?(\\d\\d):(\\d\\d)\\.\\d\\d\\d --> (\\d\\d:)?(\\d\\d):(\\d\\d)\\.\\d\\d\\d ?.*";


public VttParser(String charset) {
this.charset = charset;
Expand All @@ -55,13 +60,14 @@ public VttObject parse(InputStream is) throws IOException, SubtitleParsingExcept

@Override
public VttObject parse(InputStream is, boolean strict) throws IOException, SubtitleParsingException {
// Create srt object
// Create vttObject object
VttObject vttObject = new VttObject();

// Read each lines
// Read each line
BufferedReader br = new BufferedReader(new InputStreamReader(is, this.charset));
String textLine = "";
CursorStatus cursorStatus = CursorStatus.NONE;
CursorStatus memorizedCursorStatus = CursorStatus.NONE;
VttCue cue = null;
String cueText = ""; // Text of the cue

Expand All @@ -74,46 +80,73 @@ public VttObject parse(InputStream is, boolean strict) throws IOException, Subti
}

// All Vtt files start with WEBVTT
if (cursorStatus == CursorStatus.NONE && textLine.equals("WEBVTT")) {
//if (cursorStatus == CursorStatus.NONE && textLine.equals("WEBVTT")) {
// changed for #27
if (cursorStatus == CursorStatus.NONE && textLine.startsWith("WEBVTT")) {
cursorStatus = CursorStatus.SIGNATURE;
continue;
}

if (textLine.startsWith("NOTE")){
memorizedCursorStatus = cursorStatus;
cursorStatus = CursorStatus.NOTE;
continue;
}
if (cursorStatus == CursorStatus.NOTE){
if (textLine.isEmpty()) {
// NOTE section is over
cursorStatus = memorizedCursorStatus;
}
// do nothing in any case
continue;

}




if (cursorStatus == CursorStatus.SIGNATURE ||
cursorStatus == CursorStatus.EMPTY_LINE) {
if (cursorStatus == CursorStatus.SIGNATURE || cursorStatus == CursorStatus.EMPTY_LINE) {
if (textLine.isEmpty()) {
continue;
}

// New cue
cue = new VttCue();
cursorStatus = CursorStatus.CUE_ID;

if (
textLine.length() < 16 ||
!textLine.substring(13, 16).equals("-->")
// changed for issue #21
//!textLine.substring(13, 16).equals("-->")
!(textLine.matches(TIMECODE_LINE_REGEX))
) {
// First textLine is the cue number
cue.setId(textLine);
continue;
}

// There is no cue number
}


// Second textLine defines the start and end time codes
// 00:01:21.456 --> 00:01:23.417
if (cursorStatus == CursorStatus.CUE_ID) {
if (textLine.length() < 29 ||
!textLine.substring(13, 16).equals("-->")
if (//textLine.length() < 29 ||
// changed for issue #21
//!textLine.substring(13, 16).equals("-->")
!(textLine.matches(TIMECODE_LINE_REGEX))
) {
throw new SubtitleParsingException(String.format(
"Timecode textLine is badly formated: %s", textLine));
}

cue.setStartTime(this.parseTimeCode(textLine.substring(0, 12)));
cue.setEndTime(this.parseTimeCode(textLine.substring(17)));
// changed for issue #21
//cue.setStartTime(this.parseTimeCode(textLine.substring(0, 12)));
//cue.setEndTime(this.parseTimeCode(textLine.substring(17)));
int arrowIndex = textLine.indexOf("-->");
cue.setStartTime(this.parseTimeCode(textLine.substring(0, arrowIndex).trim()));
cue.setEndTime(this.parseTimeCode(textLine.substring(arrowIndex + 3).trim()));
cursorStatus = CursorStatus.CUE_TIMECODE;
continue;
}
Expand Down Expand Up @@ -305,10 +338,14 @@ private List<SubtitleLine> parseCueText(String cueText) {

private SubtitleTimeCode parseTimeCode(String timeCodeString) throws SubtitleParsingException {
try {
int hour = Integer.parseInt(timeCodeString.substring(0, 2));
int minute = Integer.parseInt(timeCodeString.substring(3, 5));
int second = Integer.parseInt(timeCodeString.substring(6, 8));
int millisecond = Integer.parseInt(timeCodeString.substring(9, 12));
String adjustedTimeCodeString = timeCodeString;
if (timeCodeString.length()<10){
adjustedTimeCodeString = "00:" + timeCodeString;
}
int hour = Integer.parseInt(adjustedTimeCodeString.substring(0, 2));
int minute = Integer.parseInt(adjustedTimeCodeString.substring(3, 5));
int second = Integer.parseInt(adjustedTimeCodeString.substring(6, 8));
int millisecond = Integer.parseInt(adjustedTimeCodeString.substring(9, 12));
return new SubtitleTimeCode(hour, minute, second, millisecond);
} catch (NumberFormatException e) {
throw new SubtitleParsingException(String.format(
Expand Down

0 comments on commit 678fd49

Please sign in to comment.