From 2e4a3a3cf2d36f6181df146812b0b5be8c6d3c11 Mon Sep 17 00:00:00 2001 From: berndmoos Date: Sat, 11 May 2024 15:26:51 +0200 Subject: [PATCH 1/4] Fixes for #21 and #22 --- .../java/fr/noop/subtitle/srt/SrtParser.java | 8 ++++- .../java/fr/noop/subtitle/vtt/VttParser.java | 30 ++++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/src/main/java/fr/noop/subtitle/srt/SrtParser.java b/src/main/java/fr/noop/subtitle/srt/SrtParser.java index 4b56255..fcee95f 100644 --- a/src/main/java/fr/noop/subtitle/srt/SrtParser.java +++ b/src/main/java/fr/noop/subtitle/srt/SrtParser.java @@ -17,6 +17,7 @@ import fr.noop.subtitle.model.SubtitleParser; import fr.noop.subtitle.model.SubtitleParsingException; +import fr.noop.subtitle.util.StringUtils; import fr.noop.subtitle.util.SubtitlePlainText; import fr.noop.subtitle.util.SubtitleTextLine; import fr.noop.subtitle.util.SubtitleTimeCode; @@ -61,13 +62,18 @@ public SrtObject parse(InputStream is, boolean strict) throws IOException, Subti if (textLine.isEmpty()) { continue; } + + // issue #22 + // Remove BOM + textLine = StringUtils.removeBOM(textLine); + // New cue cue = new SrtCue(); // First textLine is the cue number try { - Integer.parseInt(textLine); + Integer.valueOf(textLine); } catch (NumberFormatException e) { throw new SubtitleParsingException(String.format( "Unable to parse cue number: %s", diff --git a/src/main/java/fr/noop/subtitle/vtt/VttParser.java b/src/main/java/fr/noop/subtitle/vtt/VttParser.java index 338dbf8..347985e 100644 --- a/src/main/java/fr/noop/subtitle/vtt/VttParser.java +++ b/src/main/java/fr/noop/subtitle/vtt/VttParser.java @@ -43,6 +43,10 @@ private enum TagStatus { } private String charset; // Charset of the input files + + // for #21 + private final String TIMECODE_LINE_REGEX = "^(\\d\\d:)?(\\d\\d):(\\d\\d)\\.\\d\\d\\d --> (\\d\\d:)?(\\d\\d):(\\d\\d)\\.\\d\\d\\d ?.*"; + public VttParser(String charset) { this.charset = charset; @@ -88,6 +92,18 @@ public VttObject parse(InputStream is, boolean strict) throws IOException, Subti // New cue cue = new VttCue(); cursorStatus = CursorStatus.CUE_ID; + + if ( + textLine.length() < 16 || + // changed for issue #21 + //!textLine.substring(13, 16).equals("-->") + !(textLine.matches(TIMECODE_LINE_REGEX)) + ) { + // First textLine is the cue number + cue.setId(textLine); + continue; + } + if ( textLine.length() < 16 || @@ -105,15 +121,21 @@ public VttObject parse(InputStream is, boolean strict) throws IOException, Subti // Second textLine defines the start and end time codes // 00:01:21.456 --> 00:01:23.417 if (cursorStatus == CursorStatus.CUE_ID) { - if (textLine.length() < 29 || - !textLine.substring(13, 16).equals("-->") + if (//textLine.length() < 29 || + // changed for issue #21 + //!textLine.substring(13, 16).equals("-->") + !(textLine.matches(TIMECODE_LINE_REGEX)) ) { throw new SubtitleParsingException(String.format( "Timecode textLine is badly formated: %s", textLine)); } - cue.setStartTime(this.parseTimeCode(textLine.substring(0, 12))); - cue.setEndTime(this.parseTimeCode(textLine.substring(17))); + // changed for issue #21 + //cue.setStartTime(this.parseTimeCode(textLine.substring(0, 12))); + //cue.setEndTime(this.parseTimeCode(textLine.substring(17))); + int arrowIndex = textLine.indexOf("-->"); + cue.setStartTime(this.parseTimeCode(textLine.substring(0, arrowIndex).trim())); + cue.setEndTime(this.parseTimeCode(textLine.substring(arrowIndex + 3).trim())); cursorStatus = CursorStatus.CUE_TIMECODE; continue; } From dffa688f96e2105cbe79e1d9345d9e350b1ea5a9 Mon Sep 17 00:00:00 2001 From: berndmoos Date: Sat, 11 May 2024 15:31:14 +0200 Subject: [PATCH 2/4] Fix for #27 --- src/main/java/fr/noop/subtitle/vtt/VttParser.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/main/java/fr/noop/subtitle/vtt/VttParser.java b/src/main/java/fr/noop/subtitle/vtt/VttParser.java index 347985e..0584542 100644 --- a/src/main/java/fr/noop/subtitle/vtt/VttParser.java +++ b/src/main/java/fr/noop/subtitle/vtt/VttParser.java @@ -78,7 +78,9 @@ public VttObject parse(InputStream is, boolean strict) throws IOException, Subti } // All Vtt files start with WEBVTT - if (cursorStatus == CursorStatus.NONE && textLine.equals("WEBVTT")) { + //if (cursorStatus == CursorStatus.NONE && textLine.equals("WEBVTT")) { + // changed for #27 + if (cursorStatus == CursorStatus.NONE && textLine.startsWith("WEBVTT")) { cursorStatus = CursorStatus.SIGNATURE; continue; } From 0bce31706d06916a435fbeff230a2db602b081a4 Mon Sep 17 00:00:00 2001 From: berndmoos Date: Sat, 11 May 2024 16:43:21 +0200 Subject: [PATCH 3/4] Fix for #28 --- .../java/fr/noop/subtitle/vtt/VttParser.java | 37 ++++++++++++------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/src/main/java/fr/noop/subtitle/vtt/VttParser.java b/src/main/java/fr/noop/subtitle/vtt/VttParser.java index 0584542..57b4d54 100644 --- a/src/main/java/fr/noop/subtitle/vtt/VttParser.java +++ b/src/main/java/fr/noop/subtitle/vtt/VttParser.java @@ -33,6 +33,7 @@ private enum CursorStatus { EMPTY_LINE, CUE_ID, CUE_TIMECODE, + NOTE, CUE_TEXT; } @@ -59,13 +60,14 @@ public VttObject parse(InputStream is) throws IOException, SubtitleParsingExcept @Override public VttObject parse(InputStream is, boolean strict) throws IOException, SubtitleParsingException { - // Create srt object + // Create vttObject object VttObject vttObject = new VttObject(); - // Read each lines + // Read each line BufferedReader br = new BufferedReader(new InputStreamReader(is, this.charset)); String textLine = ""; CursorStatus cursorStatus = CursorStatus.NONE; + CursorStatus memorizedCursorStatus = CursorStatus.NONE; VttCue cue = null; String cueText = ""; // Text of the cue @@ -84,9 +86,26 @@ public VttObject parse(InputStream is, boolean strict) throws IOException, Subti cursorStatus = CursorStatus.SIGNATURE; continue; } + + if (textLine.startsWith("NOTE")){ + memorizedCursorStatus = cursorStatus; + cursorStatus = CursorStatus.NOTE; + continue; + } + if (cursorStatus == CursorStatus.NOTE){ + if (textLine.isEmpty()) { + // NOTE section is over + cursorStatus = memorizedCursorStatus; + } + // do nothing in any case + continue; + + } + + + - if (cursorStatus == CursorStatus.SIGNATURE || - cursorStatus == CursorStatus.EMPTY_LINE) { + if (cursorStatus == CursorStatus.SIGNATURE || cursorStatus == CursorStatus.EMPTY_LINE) { if (textLine.isEmpty()) { continue; } @@ -106,16 +125,6 @@ public VttObject parse(InputStream is, boolean strict) throws IOException, Subti continue; } - - if ( - textLine.length() < 16 || - !textLine.substring(13, 16).equals("-->") - ) { - // First textLine is the cue number - cue.setId(textLine); - continue; - } - // There is no cue number } From 8c33805d3727bde6982cc33c0e6c2de5a8c66c3e Mon Sep 17 00:00:00 2001 From: berndmoos Date: Sat, 11 May 2024 16:50:42 +0200 Subject: [PATCH 4/4] Forgot a bit from fix for #21 --- src/main/java/fr/noop/subtitle/vtt/VttParser.java | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/main/java/fr/noop/subtitle/vtt/VttParser.java b/src/main/java/fr/noop/subtitle/vtt/VttParser.java index 57b4d54..27d273f 100644 --- a/src/main/java/fr/noop/subtitle/vtt/VttParser.java +++ b/src/main/java/fr/noop/subtitle/vtt/VttParser.java @@ -338,10 +338,14 @@ private List parseCueText(String cueText) { private SubtitleTimeCode parseTimeCode(String timeCodeString) throws SubtitleParsingException { try { - int hour = Integer.parseInt(timeCodeString.substring(0, 2)); - int minute = Integer.parseInt(timeCodeString.substring(3, 5)); - int second = Integer.parseInt(timeCodeString.substring(6, 8)); - int millisecond = Integer.parseInt(timeCodeString.substring(9, 12)); + String adjustedTimeCodeString = timeCodeString; + if (timeCodeString.length()<10){ + adjustedTimeCodeString = "00:" + timeCodeString; + } + int hour = Integer.parseInt(adjustedTimeCodeString.substring(0, 2)); + int minute = Integer.parseInt(adjustedTimeCodeString.substring(3, 5)); + int second = Integer.parseInt(adjustedTimeCodeString.substring(6, 8)); + int millisecond = Integer.parseInt(adjustedTimeCodeString.substring(9, 12)); return new SubtitleTimeCode(hour, minute, second, millisecond); } catch (NumberFormatException e) { throw new SubtitleParsingException(String.format(