diff --git a/src/subtitles/RealTextParser.cpp b/src/subtitles/RealTextParser.cpp index c89c9074c..2377fa65c 100644 --- a/src/subtitles/RealTextParser.cpp +++ b/src/subtitles/RealTextParser.cpp @@ -1,635 +1,597 @@ -#include "StdAfx.h" +/* + * (C) 2008-2014 see Authors.txt + * + * This file is part of MPC-HC. + * + * MPC-HC is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * MPC-HC is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + +#include "stdafx.h" #include "RealTextParser.h" -CRealTextParser::CRealTextParser(void): - m_bIgnoreFont(false), - m_bIgnoreFontSize(false), - m_bIgnoreFontColor(false), - m_bIgnoreFontWeight(false), - m_bIgnoreFontFace(false), - m_iMinFontSize(14), - m_iMaxFontSize(25), - m_iDefaultSubtitleDurationInMillisecs(4000), - m_bTryToIgnoreErrors(true) +CRealTextParser::CRealTextParser() + : m_bIgnoreFont(false) + , m_bIgnoreFontSize(false) + , m_bIgnoreFontColor(false) + , m_bIgnoreFontWeight(false) + , m_bIgnoreFontFace(false) + , m_iMinFontSize(14) + , m_iMaxFontSize(25) + , m_iDefaultSubtitleDurationInMillisecs(4000) + , m_bTryToIgnoreErrors(true) { } -CRealTextParser::~CRealTextParser(void) +CRealTextParser::~CRealTextParser() { } -bool CRealTextParser::ParseRealText(wstring p_szFile) +bool CRealTextParser::ParseRealText(std::wstring p_szFile) { - vector vStartTimecodes; - vector vEndTimecodes; - bool bPrevEndTimeMissing(false); - list listTags; - list listPreviousOpenTags; - - while (p_szFile.length() > 0) - { - if (p_szFile.at(0) == '<') - { - Tag oTag; - if (!ExtractTag(p_szFile, oTag)) - return false; - - if (oTag.m_bComment) - continue; - - if (oTag.m_szName == L"time") - { - int iStartTimecode = GetTimecode(oTag.m_mapAttributes[L"begin"]); - int iEndTimecode = GetTimecode(oTag.m_mapAttributes[L"end"]); - -// FilterReduntantTags(listTags); - wstring szLine = RenderTags(listTags); - - if (bPrevEndTimeMissing) - { - pair pairTimecodes(vStartTimecodes.back(), iStartTimecode); - - // Fix issues where the next time code isn't valid end time code for the previous subtitle - if (pairTimecodes.first >= pairTimecodes.second) - { - pairTimecodes.second = pairTimecodes.first + m_iDefaultSubtitleDurationInMillisecs; - } - - if (szLine.length() > 0) - m_RealText.m_mapLines[pairTimecodes] = szLine; - - bPrevEndTimeMissing = false; - } - else if (vStartTimecodes.size() > 0 && vEndTimecodes.size() > 0) - { - pair pairTimecodes(vStartTimecodes.back(), vEndTimecodes.back()); - - if (szLine.length() > 0) - m_RealText.m_mapLines[pairTimecodes] = szLine; - - } - - vStartTimecodes.push_back(iStartTimecode); - if (iEndTimecode <= 0) - { - bPrevEndTimeMissing = true; - } - else - { - vEndTimecodes.push_back(iEndTimecode); - } - } - else if (oTag.m_szName == L"b" || oTag.m_szName == L"i" || oTag.m_szName == L"font") - { - if (oTag.m_bOpen) - listPreviousOpenTags.push_back(oTag); - - if (oTag.m_bClose) - PopTag(listPreviousOpenTags, oTag.m_szName); - - listTags.push_back(oTag); - } - else if (oTag.m_szName == L"clear") - { - listTags.clear(); - - // set existing tags - listTags.insert(listTags.end(), listPreviousOpenTags.begin(), listPreviousOpenTags.end()); - } - else if (oTag.m_szName == L"window") - { - if (oTag.m_bOpen) - m_RealText.m_WindowTag = oTag; - - // Ignore close - } - else if (oTag.m_szName == L"center") - { - m_RealText.m_bCenter = true; - } - else if (oTag.m_szName == L"required") - { - // Ignore - } - else if (oTag.m_szName == L"") - { - // Ignore - } - else - { - // assume formating tag (handled later) - listTags.push_back(oTag); - } - } - else - { - Tag oTextTag; - if (!ExtractTextTag(p_szFile, oTextTag)) - return false; - - listTags.push_back(oTextTag); - } - } - - // Handle final line -// FilterReduntantTags(listTags); - wstring szLine = RenderTags(listTags); - - if (bPrevEndTimeMissing) - { - pair pairTimecodes(vStartTimecodes.back(), vStartTimecodes.back() + m_iDefaultSubtitleDurationInMillisecs); - - if (szLine.length() > 0) - m_RealText.m_mapLines[pairTimecodes] = szLine; - - bPrevEndTimeMissing = false; - } - else if (vStartTimecodes.size() > 0 && vEndTimecodes.size() > 0) - { - pair pairTimecodes(vStartTimecodes.back(), vEndTimecodes.back()); - - if (szLine.length() > 0) - m_RealText.m_mapLines[pairTimecodes] = szLine; - - } - - return true; + std::vector vStartTimecodes; + std::vector vEndTimecodes; + bool bPrevEndTimeMissing = false; + std::list listTags; + std::list listPreviousOpenTags; + + while (!p_szFile.empty()) { + if (p_szFile.at(0) == '<') { + Tag oTag; + if (!ExtractTag(p_szFile, oTag)) { + return false; + } + + if (oTag.m_bComment) { + continue; + } + + if (oTag.m_szName == L"time") { + int iStartTimecode = GetTimecode(oTag.m_mapAttributes[L"begin"]); + int iEndTimecode = GetTimecode(oTag.m_mapAttributes[L"end"]); + + //FilterReduntantTags(listTags); + std::wstring szLine = RenderTags(listTags); + + if (bPrevEndTimeMissing) { + std::pair pairTimecodes(vStartTimecodes.back(), iStartTimecode); + + // Fix issues where the next time code isn't valid end time code for the previous subtitle + if (pairTimecodes.first >= pairTimecodes.second) { + pairTimecodes.second = pairTimecodes.first + m_iDefaultSubtitleDurationInMillisecs; + } + + if (!szLine.empty()) { + m_RealText.m_mapLines[pairTimecodes] = szLine; + } + + bPrevEndTimeMissing = false; + } else if (!vStartTimecodes.empty() && !vEndTimecodes.empty()) { + std::pair pairTimecodes(vStartTimecodes.back(), vEndTimecodes.back()); + + if (!szLine.empty()) { + m_RealText.m_mapLines[pairTimecodes] = szLine; + } + + } else if (vStartTimecodes.empty() && vEndTimecodes.empty() && m_RealText.m_mapLines.empty() && iStartTimecode > 0) { + // Handle first line + if (!szLine.empty()) { + m_RealText.m_mapLines[std::make_pair(0, iStartTimecode)] = szLine; + } + } + + vStartTimecodes.push_back(iStartTimecode); + if (iEndTimecode <= 0) { + bPrevEndTimeMissing = true; + } else { + vEndTimecodes.push_back(iEndTimecode); + } + } else if (oTag.m_szName == L"b" || oTag.m_szName == L"i" || oTag.m_szName == L"font") { + if (oTag.m_bOpen) { + listPreviousOpenTags.push_back(oTag); + } + + if (oTag.m_bClose) { + PopTag(listPreviousOpenTags, oTag.m_szName); + } + + listTags.push_back(oTag); + } else if (oTag.m_szName == L"clear") { + listTags.clear(); + + // set existing tags + listTags.insert(listTags.end(), listPreviousOpenTags.begin(), listPreviousOpenTags.end()); + } else if (oTag.m_szName == L"window") { + if (oTag.m_bOpen) { + m_RealText.m_WindowTag = oTag; + } + + // Ignore close + } else if (oTag.m_szName == L"center") { + m_RealText.m_bCenter = true; + } else if (oTag.m_szName == L"required") { + // Ignore + } else if (oTag.m_szName.empty()) { + // Ignore + } else { + // assume formating tag (handled later) + listTags.push_back(oTag); + } + } else { + Tag oTextTag; + if (!ExtractTextTag(p_szFile, oTextTag)) { + return false; + } + + listTags.push_back(oTextTag); + } + } + + // Handle final line + //FilterReduntantTags(listTags); + std::wstring szLine = RenderTags(listTags); + + if (bPrevEndTimeMissing) { + std::pair pairTimecodes(vStartTimecodes.back(), vStartTimecodes.back() + m_iDefaultSubtitleDurationInMillisecs); + + if (!szLine.empty()) { + m_RealText.m_mapLines[pairTimecodes] = szLine; + } + + } else if (!vStartTimecodes.empty() && !vEndTimecodes.empty()) { + std::pair pairTimecodes(vStartTimecodes.back(), vEndTimecodes.back()); + + if (!szLine.empty()) { + m_RealText.m_mapLines[pairTimecodes] = szLine; + } + + } + + return true; } const CRealTextParser::Subtitles& CRealTextParser::GetParsedSubtitles() { - return m_RealText; + return m_RealText; } -bool CRealTextParser::ExtractTag(wstring& p_rszLine, Tag& p_rTag) +bool CRealTextParser::ExtractTag(std::wstring& p_rszLine, Tag& p_rTag) { - if (p_rszLine.length() < 2 || p_rszLine.at(0) != '<') - { - if (m_bTryToIgnoreErrors) - { - size_t iTempPos = p_rszLine.find_first_of('<'); - - if (iTempPos != wstring::npos) - { - p_rszLine = p_rszLine.substr(iTempPos); - - if (p_rszLine.length() < 2) - return false; - } - - } - else - { - return false; - } - } - - unsigned int iPos = 1; - - // skip comments - if (p_rszLine.at(iPos) == '!') - { - p_rTag.m_bComment = true; - - wstring szComment; - GetString(p_rszLine, iPos, szComment, L">"); - p_rTag.m_szName = szComment; - - ++iPos; // Skip > - p_rszLine = p_rszLine.substr(iPos); - return true; - } - else - { - p_rTag.m_bComment = false; - } - - if (!SkipSpaces(p_rszLine, iPos)) - return false; - - if (p_rszLine.at(iPos) == '/') - { - p_rTag.m_bOpen = false; - p_rTag.m_bClose = true; - ++iPos; - } - else - { - p_rTag.m_bOpen = true; - p_rTag.m_bClose = false; - } - - if (!GetString(p_rszLine, iPos, p_rTag.m_szName, L"\r\n\t />")) - return false; - - p_rTag.m_szName = StringToLower(p_rTag.m_szName); - - if (!GetAttributes(p_rszLine, iPos, p_rTag.m_mapAttributes)) - return false; - - if (p_rszLine.at(iPos) == '/') - { - ++iPos; - p_rTag.m_bClose = true; - } - - if (p_rszLine.at(iPos) == '>') - { - ++iPos; - p_rszLine = p_rszLine.substr(iPos); - return true; - } - else - { - if (m_bTryToIgnoreErrors) - { - size_t iTempPos = p_rszLine.find_first_of('>'); - - if (iTempPos != wstring::npos) - { - if (iTempPos - 1 >= p_rszLine.length()) - return false; - - p_rszLine = p_rszLine.substr(iTempPos + 1); - return true; - } - else - { - return false; - } - - } - else - { - return false; - } - } + if (p_rszLine.length() < 2 || p_rszLine.at(0) != '<') { + if (m_bTryToIgnoreErrors) { + size_t iTempPos = p_rszLine.find_first_of('<'); + + if (iTempPos != std::wstring::npos) { + p_rszLine = p_rszLine.substr(iTempPos); + + if (p_rszLine.length() < 2) { + return false; + } + } + + } else { + return false; + } + } + + unsigned int iPos = 1; + + // skip comments + if (p_rszLine.at(iPos) == '!') { + p_rTag.m_bComment = true; + + std::wstring szComment; + GetString(p_rszLine, iPos, szComment, L">"); + p_rTag.m_szName = szComment; + + ++iPos; // Skip > + if (iPos < p_rszLine.length()) { + p_rszLine = p_rszLine.substr(iPos); + return true; + } else { + return false; + } + } else { + p_rTag.m_bComment = false; + } + + if (!SkipSpaces(p_rszLine, iPos)) { + return false; + } + + if (p_rszLine.at(iPos) == '/') { + p_rTag.m_bOpen = false; + p_rTag.m_bClose = true; + ++iPos; + } else { + p_rTag.m_bOpen = true; + p_rTag.m_bClose = false; + } + + if (!GetString(p_rszLine, iPos, p_rTag.m_szName, L"\r\n\t />")) { + return false; + } + + p_rTag.m_szName = StringToLower(p_rTag.m_szName); + + if (!GetAttributes(p_rszLine, iPos, p_rTag.m_mapAttributes)) { + return false; + } + + if (p_rszLine.at(iPos) == '/') { + ++iPos; + p_rTag.m_bClose = true; + if (iPos >= p_rszLine.length()) { + return false; + } + } + + if (p_rszLine.at(iPos) == '>') { + ++iPos; + p_rszLine = p_rszLine.substr(iPos); + return true; + } else { + if (m_bTryToIgnoreErrors) { + size_t iTempPos = p_rszLine.find_first_of('>'); + + if (iTempPos != std::wstring::npos) { + if (iTempPos - 1 >= p_rszLine.length()) { + return false; + } + + p_rszLine = p_rszLine.substr(iTempPos + 1); + return true; + } else { + return false; + } + + } else { + return false; + } + } } -bool CRealTextParser::ExtractTextTag(wstring& p_rszLine, Tag& p_rTag) +bool CRealTextParser::ExtractTextTag(std::wstring& p_rszLine, Tag& p_rTag) { - p_rTag.m_bText = true; - return ExtractString(p_rszLine, p_rTag.m_szName); + p_rTag.m_bText = true; + return ExtractString(p_rszLine, p_rTag.m_szName); } -bool CRealTextParser::ExtractString(wstring& p_rszLine, wstring& p_rszString) +bool CRealTextParser::ExtractString(std::wstring& p_rszLine, std::wstring& p_rszString) { - if (p_rszLine.length() == 0 || p_rszLine.at(0) == '<') - { - if (m_bTryToIgnoreErrors) - { - p_rszString = L""; - return true; - } - else - { - return false; - } - } - - unsigned int iPos = 0; - - if (!SkipSpaces(p_rszLine, iPos)) - return false; - - if (!GetString(p_rszLine, iPos, p_rszString, L"<")) - return false; - - p_rszLine = p_rszLine.substr(iPos); - return true; + if (p_rszLine.empty() || p_rszLine.at(0) == '<') { + if (m_bTryToIgnoreErrors) { + p_rszString = L""; + return true; + } else { + return false; + } + } + + unsigned int iPos = 0; + + if (!SkipSpaces(p_rszLine, iPos)) { + return false; + } + + if (!GetString(p_rszLine, iPos, p_rszString, L"<")) { + return false; + } + + p_rszLine = p_rszLine.substr(iPos); + return true; } -bool CRealTextParser::SkipSpaces(wstring& p_rszLine, unsigned int& p_riPos) +bool CRealTextParser::SkipSpaces(std::wstring& p_rszLine, unsigned int& p_riPos) { - while (p_rszLine.length() > p_riPos && iswspace(p_rszLine.at(p_riPos))) - { - ++p_riPos; - } + while (p_rszLine.length() > p_riPos && iswspace(p_rszLine.at(p_riPos))) { + ++p_riPos; + } - return p_rszLine.length() > p_riPos; + return p_rszLine.length() > p_riPos; } -bool CRealTextParser::GetString(wstring& p_rszLine, unsigned int& p_riPos, wstring& p_rszString, const wstring& p_crszEndChars) +bool CRealTextParser::GetString(std::wstring& p_rszLine, unsigned int& p_riPos, std::wstring& p_rszString, const std::wstring& p_crszEndChars) { - while (p_rszLine.length() > p_riPos && p_crszEndChars.find(p_rszLine.at(p_riPos)) == wstring::npos) - { - p_rszString += p_rszLine.at(p_riPos); - ++p_riPos; - } + while (p_rszLine.length() > p_riPos && p_crszEndChars.find(p_rszLine.at(p_riPos)) == std::wstring::npos) { + p_rszString += p_rszLine.at(p_riPos); + ++p_riPos; + } - return p_rszLine.length() > p_riPos; + return p_rszLine.length() > p_riPos; } -bool CRealTextParser::GetAttributes(wstring& p_rszLine, unsigned int& p_riPos, map& p_rmapAttributes) +bool CRealTextParser::GetAttributes(std::wstring& p_rszLine, unsigned int& p_riPos, std::map& p_rmapAttributes) { - if (!SkipSpaces(p_rszLine, p_riPos)) - return false; - - while (p_riPos>p_rszLine.length() && p_rszLine.at(p_riPos) != '/' && p_rszLine.at(p_riPos) != '>') - { - wstring szName; - if (!GetString(p_rszLine, p_riPos, szName, L"\r\n\t =")) - return false; - - if (!SkipSpaces(p_rszLine, p_riPos)) - return false; - - if (p_rszLine.at(p_riPos) != '=') - { - if (m_bTryToIgnoreErrors) - { - p_riPos = p_rszLine.find_first_of('=', p_riPos); - if (p_riPos == wstring::npos) - return false; - } - else - { - return false; - } - } - - ++p_riPos; - - if (!SkipSpaces(p_rszLine, p_riPos)) - return false; - - bool bUsesQuotes(false); - if (p_rszLine.at(p_riPos) == '\'' || p_rszLine.at(p_riPos) == '\"') - { - ++p_riPos; - bUsesQuotes = true; - } - - if (!SkipSpaces(p_rszLine, p_riPos)) - return false; - - wstring szValue; - if (bUsesQuotes) - { - if (!GetString(p_rszLine, p_riPos, szValue, L"\"\'/>")) - return false; - } - else - { - if (!GetString(p_rszLine, p_riPos, szValue, L" \t/>")) - return false; - } - - p_rmapAttributes[StringToLower(szName)] = szValue; - - if (!SkipSpaces(p_rszLine, p_riPos)) - return false; - - if (p_rszLine.at(p_riPos) == '\'' || p_rszLine.at(p_riPos) == '\"') - ++p_riPos; - - if (!SkipSpaces(p_rszLine, p_riPos)) - return false; - } - - return p_rszLine.length() > p_riPos; + if (!SkipSpaces(p_rszLine, p_riPos)) { + return false; + } + + while (p_riPos < p_rszLine.length() && p_rszLine.at(p_riPos) != '/' && p_rszLine.at(p_riPos) != '>') { + std::wstring szName; + if (!GetString(p_rszLine, p_riPos, szName, L"\r\n\t =")) { + return false; + } + + if (!SkipSpaces(p_rszLine, p_riPos)) { + return false; + } + + if (p_rszLine.at(p_riPos) != '=') { + if (m_bTryToIgnoreErrors) { + p_riPos = (unsigned int)p_rszLine.find_first_of('=', p_riPos); + if (p_riPos == std::wstring::npos) { + return false; + } + } else { + return false; + } + } + + ++p_riPos; + + if (!SkipSpaces(p_rszLine, p_riPos)) { + return false; + } + + bool bUsesQuotes = false; + if (p_rszLine.at(p_riPos) == '\'' || p_rszLine.at(p_riPos) == '\"') { + ++p_riPos; + bUsesQuotes = true; + } + + if (!SkipSpaces(p_rszLine, p_riPos)) { + return false; + } + + std::wstring szValue; + if (bUsesQuotes) { + if (!GetString(p_rszLine, p_riPos, szValue, L"\"\'/>")) { + return false; + } + } else { + if (!GetString(p_rszLine, p_riPos, szValue, L" \t/>")) { + return false; + } + } + + p_rmapAttributes[StringToLower(szName)] = szValue; + + if (!SkipSpaces(p_rszLine, p_riPos)) { + return false; + } + + if (p_rszLine.at(p_riPos) == '\'' || p_rszLine.at(p_riPos) == '\"') { + ++p_riPos; + } + + if (!SkipSpaces(p_rszLine, p_riPos)) { + return false; + } + } + + return p_rszLine.length() > p_riPos; } -int CRealTextParser::GetTimecode(const wstring& p_crszTimecode) +int CRealTextParser::GetTimecode(const std::wstring& p_crszTimecode) { - int iTimecode(0); - int iMultiplier(1); - - // Exception: if the timecode doesn't contain any separators, assume the time code is in seconds (and change multiplier to reflect that) - if (p_crszTimecode.find_first_of('.') == wstring::npos && p_crszTimecode.find_first_of(':') == wstring::npos) - iMultiplier = 1000; - - wstring szCurrentPart; - - for (int i = p_crszTimecode.length() - 1; i >= 0; --i) - { - if (p_crszTimecode.at(i) == '.' || p_crszTimecode.at(i) == ':') - { - if (iMultiplier == 1) - { - while (szCurrentPart.length() < 3) - szCurrentPart += L"0"; - } - - iTimecode += iMultiplier * ::_wtoi(szCurrentPart.c_str()); - - if (iMultiplier == 1) - { - iMultiplier = 1000; - } - else - { - iMultiplier *= 60; - } - - szCurrentPart = L""; - } - else - { - szCurrentPart = p_crszTimecode.substr(i, 1) + szCurrentPart; - } - } - - iTimecode += iMultiplier * ::_wtoi(szCurrentPart.c_str()); - - return iTimecode; + int iTimecode = 0; + int iMultiplier = 1; + + // Exception: if the timecode doesn't contain any separators, assume the time code is in seconds (and change multiplier to reflect that) + if (p_crszTimecode.find_first_of('.') == std::wstring::npos && p_crszTimecode.find_first_of(':') == std::wstring::npos) { + iMultiplier = 1000; + } + + std::wstring szCurrentPart; + + for (ptrdiff_t i = p_crszTimecode.length() - 1; i >= 0; --i) { + if (p_crszTimecode.at(i) == '.' || p_crszTimecode.at(i) == ':') { + if (iMultiplier == 1) { + while (szCurrentPart.length() < 3) { + szCurrentPart += L"0"; + } + } + + iTimecode += iMultiplier * ::_wtoi(szCurrentPart.c_str()); + + if (iMultiplier == 1) { + iMultiplier = 1000; + } else { + iMultiplier *= 60; + } + + szCurrentPart = L""; + } else { + szCurrentPart = p_crszTimecode.substr(i, 1) + szCurrentPart; + } + } + + iTimecode += iMultiplier * ::_wtoi(szCurrentPart.c_str()); + + return iTimecode; } -wstring CRealTextParser::FormatTimecode(int iTimecode, - int iMillisecondPrecision/* = 3*/, - bool p_bPadZeroes/* = true*/, - const wstring& p_crszSeparator/* = ":"*/, - const wstring& p_crszMillisecondSeparator/* = "."*/) +std::wstring CRealTextParser::FormatTimecode(int iTimecode, + int iMillisecondPrecision/* = 3*/, + bool p_bPadZeroes/* = true*/, + const std::wstring& p_crszSeparator/* = ":"*/, + const std::wstring& p_crszMillisecondSeparator/* = "."*/) { - wostringstream ossTimecode; + std::wostringstream ossTimecode; - int iHours = iTimecode / 1000 / 60 / 60; + int iHours = iTimecode / 1000 / 60 / 60; - ossTimecode << iHours; + ossTimecode << iHours; - int iMinutes = (iTimecode / 1000 / 60) % 60; + int iMinutes = (iTimecode / 1000 / 60) % 60; - ossTimecode << p_crszSeparator; - ossTimecode << iMinutes; + ossTimecode << p_crszSeparator; + ossTimecode << iMinutes; - int iSeconds = (iTimecode / 1000) % 60; + int iSeconds = (iTimecode / 1000) % 60; - ossTimecode << p_crszSeparator; - ossTimecode << iSeconds; + ossTimecode << p_crszSeparator; + ossTimecode << iSeconds; - int iMilliSeconds = iTimecode % 1000; + int iMilliSeconds = iTimecode % 1000; - if (iMillisecondPrecision < 3) - iMilliSeconds /= 10 * (3 - iMillisecondPrecision); + if (iMillisecondPrecision < 3) { + iMilliSeconds /= 10 * (3 - iMillisecondPrecision); + } - ossTimecode << p_crszMillisecondSeparator; - ossTimecode << iMilliSeconds; + ossTimecode << p_crszMillisecondSeparator; + ossTimecode << iMilliSeconds; - return ossTimecode.str(); + return ossTimecode.str(); } -wstring CRealTextParser::StringToLower(const wstring& p_crszString) +std::wstring CRealTextParser::StringToLower(const std::wstring& p_crszString) { - wstring szLowercaseString; - for(unsigned int i=0; i < p_crszString.length(); ++i) - { - szLowercaseString += towlower(p_crszString.at(i)); - } - return szLowercaseString; + std::wstring szLowercaseString; + for (unsigned int i = 0; i < p_crszString.length(); ++i) { + szLowercaseString += towlower(p_crszString.at(i)); + } + return szLowercaseString; } -wstring CRealTextParser::RenderTags(const list& p_crlTags) +std::wstring CRealTextParser::RenderTags(const std::list& p_crlTags) { - bool bEmpty(true); - wstring szString; - - for (list::const_iterator iter = p_crlTags.begin(); iter != p_crlTags.end(); ++iter) - { - Tag oTag(*iter); - - if (oTag.m_szName == L"br") - { - szString += L"\n"; - } - else if (oTag.m_szName == L"b") - { - if (!m_bIgnoreFontWeight) - { - if (oTag.m_bOpen) - { - szString += L""; - } - else if (oTag.m_bClose) - { - szString += L""; - } - } - } - else if (oTag.m_szName == L"i") - { - if (!m_bIgnoreFontWeight) - { - if (oTag.m_bOpen) - { - szString += L""; - } - else if (oTag.m_bClose) - { - szString += L""; - } - } - } - else if (oTag.m_szName == L"font") - { - if (!m_bIgnoreFont) - { - if (oTag.m_bOpen) - { - szString += L":: iterator i = oTag.m_mapAttributes.begin(); i != oTag.m_mapAttributes.end(); ++i) - { - if (m_bIgnoreFontSize && i->first == L"size") - continue; - - if (m_bIgnoreFontColor && i->first == L"color") - continue; - - if (m_bIgnoreFontFace && i->first == L"face") - continue; - - if (i->first == L"size" && i->second.length() > 0 && ::iswdigit(i->second.at(0))) - { - int iSize = ::_wtoi(i->second.c_str()); - - if (iSize > 0 && iSize < m_iMinFontSize) - continue; - - if (iSize > m_iMaxFontSize) - continue; - } - - szString += L" "; - szString += i->first; - szString += L"=\""; - szString += i->second; - szString += L"\""; - } - szString += L">"; - } - - if (oTag.m_bClose) - { - szString += L""; - } - } - } - else if (oTag.m_bText) - { - szString += oTag.m_szName; - - if (!oTag.m_szName.empty()) - bEmpty = false; - } - else - { -// AfxMessageBox(CString(_T("Unknown RealText-tag: ")) + oTag.m_szName.c_str()); - } - } - - if (bEmpty) - return L""; - else - return szString; + bool bEmpty = true; + std::wstring szString; + + for (auto iter = p_crlTags.cbegin(); iter != p_crlTags.cend(); ++iter) { + Tag oTag(*iter); + + if (oTag.m_szName == L"br") { + szString += L"\n"; + } else if (oTag.m_szName == L"b") { + if (!m_bIgnoreFontWeight) { + if (oTag.m_bOpen) { + szString += L""; + } else if (oTag.m_bClose) { + szString += L""; + } + } + } else if (oTag.m_szName == L"i") { + if (!m_bIgnoreFontWeight) { + if (oTag.m_bOpen) { + szString += L""; + } else if (oTag.m_bClose) { + szString += L""; + } + } + } else if (oTag.m_szName == L"font") { + if (!m_bIgnoreFont) { + if (oTag.m_bOpen) { + szString += L":: iterator i = oTag.m_mapAttributes.begin(); i != oTag.m_mapAttributes.end(); ++i) { + if (m_bIgnoreFontSize && i->first == L"size") { + continue; + } + + if (m_bIgnoreFontColor && i->first == L"color") { + continue; + } + + if (m_bIgnoreFontFace && i->first == L"face") { + continue; + } + + if (i->first == L"size" && !i->second.empty() && ::iswdigit(i->second.at(0))) { + int iSize = ::_wtoi(i->second.c_str()); + + if (iSize > 0 && iSize < m_iMinFontSize) { + continue; + } + + if (iSize > m_iMaxFontSize) { + continue; + } + } + + szString += L" "; + szString += i->first; + szString += L"=\""; + szString += i->second; + szString += L"\""; + } + szString += L">"; + } + + if (oTag.m_bClose) { + szString += L""; + } + } + } else if (oTag.m_bText) { + szString += oTag.m_szName; + + if (!oTag.m_szName.empty()) { + bEmpty = false; + } + } else { + //AfxMessageBox(CString(_T("Unknown RealText-tag: ")) + oTag.m_szName.c_str()); + } + } + + if (bEmpty) { + return L""; + } else { + return szString; + } } -bool CRealTextParser::OutputSRT(wostream& p_rOutput) +bool CRealTextParser::OutputSRT(std::wostream& p_rOutput) { - int iCounter(1); - for (map, wstring>::const_iterator i = m_RealText.m_mapLines.begin(); - i != m_RealText.m_mapLines.end(); - ++i) - { - p_rOutput << iCounter++; - p_rOutput << endl; - - p_rOutput << FormatTimecode(i->first.first); - p_rOutput << L" --> "; - p_rOutput << FormatTimecode(i->first.second); - p_rOutput << endl; - - p_rOutput << i->second; - p_rOutput << endl; - p_rOutput << endl; - } - - return true; + int iCounter = 1; + for (auto i = m_RealText.m_mapLines.cbegin(); i != m_RealText.m_mapLines.cend(); ++i) { + p_rOutput << iCounter++; + p_rOutput << std::endl; + + p_rOutput << FormatTimecode(i->first.first); + p_rOutput << L" --> "; + p_rOutput << FormatTimecode(i->first.second); + p_rOutput << std::endl; + + p_rOutput << i->second; + p_rOutput << std::endl; + p_rOutput << std::endl; + } + + return true; } -void CRealTextParser::PopTag(list& p_rlistTags, const wstring& p_crszTagName) +void CRealTextParser::PopTag(std::list& p_rlistTags, const std::wstring& p_crszTagName) { - for (list::reverse_iterator riter = p_rlistTags.rbegin(); riter != p_rlistTags.rend(); ++riter) - { - if (riter->m_szName == p_crszTagName) - { - p_rlistTags.erase((++riter).base()); - return; - } - } + for (auto riter = p_rlistTags.crbegin(); riter != p_rlistTags.crend(); ++riter) { + if (riter->m_szName == p_crszTagName) { + p_rlistTags.erase((++riter).base()); + return; + } + } } -void CRealTextParser::FilterReduntantTags(list& p_rlistTags) +/*void CRealTextParser::FilterReduntantTags(std::list& p_rlistTags) { - list::iterator iterPrev; - for (list::iterator iterCurrent = p_rlistTags.begin(); iterCurrent != p_rlistTags.end(); ++iterCurrent) - { - if (iterCurrent != p_rlistTags.begin()) - { - if (iterPrev->m_szName == L"font" && iterCurrent->m_szName == L"font" && - iterPrev->m_bOpen && iterCurrent->m_bOpen) - { - p_rlistTags.erase(iterPrev); - } - } - iterPrev = iterCurrent; - } -} + std::list::iterator iterPrev; + for (std::list::iterator iterCurrent = p_rlistTags.begin(); iterCurrent != p_rlistTags.end(); ++iterCurrent) { + if (iterCurrent != p_rlistTags.begin()) { + if (iterPrev->m_szName == L"font" && iterCurrent->m_szName == L"font" && + iterPrev->m_bOpen && iterCurrent->m_bOpen) { + p_rlistTags.erase(iterPrev); + } + } + iterPrev = iterCurrent; + } +}*/ diff --git a/src/subtitles/RealTextParser.h b/src/subtitles/RealTextParser.h index 06247e430..9c30c16c1 100644 --- a/src/subtitles/RealTextParser.h +++ b/src/subtitles/RealTextParser.h @@ -1,102 +1,106 @@ +/* + * (C) 2008-2013 see Authors.txt + * + * This file is part of MPC-HC. + * + * MPC-HC is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * MPC-HC is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ + #pragma once #include -using std::wostream; -using std::wostringstream; -using std::endl; - #include -using std::wstring; - #include -using std::map; -using std::pair; - #include -using std::vector; - #include -using std::list; - #include -using std::towlower; class CRealTextParser { public: - CRealTextParser(); - virtual ~CRealTextParser(void); + CRealTextParser(); + virtual ~CRealTextParser(); - struct Tag - { - Tag(): m_bOpen(false), m_bClose(false), m_bComment(false), m_bText(false) {} + struct Tag { + Tag(): m_bOpen(false), m_bClose(false), m_bComment(false), m_bText(false) {} - wstring m_szName; + std::wstring m_szName; - bool m_bOpen; - bool m_bClose; + bool m_bOpen; + bool m_bClose; - bool m_bComment; - bool m_bText; + bool m_bComment; + bool m_bText; - map m_mapAttributes; - }; + std::map m_mapAttributes; + }; - struct Subtitles - { - Subtitles(): m_WindowTag(), m_FontTag(), m_bCenter(false) {} + struct Subtitles { + Subtitles(): m_WindowTag(), m_FontTag(), m_bCenter(false) {} - Tag m_WindowTag; - Tag m_FontTag; + Tag m_WindowTag; + Tag m_FontTag; - bool m_bCenter; + bool m_bCenter; - map, wstring> m_mapLines; - }; + std::map, std::wstring> m_mapLines; + }; - bool ParseRealText(wstring p_szFile); + bool ParseRealText(std::wstring p_szFile); - const Subtitles& GetParsedSubtitles(); + const Subtitles& GetParsedSubtitles(); - bool OutputSRT(wostream& p_rOutput); + bool OutputSRT(std::wostream& p_rOutput); private: - bool ExtractTag(wstring& p_rszLine, Tag& p_rTag); - bool ExtractTextTag(wstring& p_rszLine, Tag& p_rTag); - bool ExtractString(wstring& p_rszLine, wstring& p_rszString); - bool SkipSpaces(wstring& p_rszLine, unsigned int& p_riPos); - bool GetString(wstring& p_rszLine, unsigned int& p_riPos, wstring& p_rszString, const wstring& p_crszEndChars); - bool GetAttributes(wstring& p_rszLine, unsigned int& p_riPos, map& p_rmapAttributes); + bool ExtractTag(std::wstring& p_rszLine, Tag& p_rTag); + bool ExtractTextTag(std::wstring& p_rszLine, Tag& p_rTag); + bool ExtractString(std::wstring& p_rszLine, std::wstring& p_rszString); + bool SkipSpaces(std::wstring& p_rszLine, unsigned int& p_riPos); + bool GetString(std::wstring& p_rszLine, unsigned int& p_riPos, std::wstring& p_rszString, const std::wstring& p_crszEndChars); + bool GetAttributes(std::wstring& p_rszLine, unsigned int& p_riPos, std::map& p_rmapAttributes); - int GetTimecode(const wstring& p_crszTimecode); - wstring FormatTimecode(int iTimecode, - int iMillisecondPrecision = 3, - bool p_bPadZeroes = true, - const wstring& p_crszSeparator = L":", - const wstring& p_crszMillisecondSeparator = L"."); + int GetTimecode(const std::wstring& p_crszTimecode); + std::wstring FormatTimecode(int iTimecode, + int iMillisecondPrecision = 3, + bool p_bPadZeroes = true, + const std::wstring& p_crszSeparator = L":", + const std::wstring& p_crszMillisecondSeparator = L"."); - wstring StringToLower(const wstring& p_crszString); + std::wstring StringToLower(const std::wstring& p_crszString); - wstring RenderTags(const list& p_crlTags); + std::wstring RenderTags(const std::list& p_crlTags); - void PopTag(list& p_rlistTags, const wstring& p_crszTagName); + void PopTag(std::list& p_rlistTags, const std::wstring& p_crszTagName); - // Filter out for example multiple font tags opened previously (font tags are not always terminated properly in realtext and can build up) - void FilterReduntantTags(list& p_rlistTags); + // Filter out for example multiple font tags opened previously + // (font tags are not always terminated properly in realtext and can build up) + //void FilterReduntantTags(list& p_rlistTags); - - Subtitles m_RealText; + Subtitles m_RealText; - bool m_bIgnoreFont; - bool m_bIgnoreFontSize; - bool m_bIgnoreFontColor; - bool m_bIgnoreFontWeight; - bool m_bIgnoreFontFace; + bool m_bIgnoreFont; + bool m_bIgnoreFontSize; + bool m_bIgnoreFontColor; + bool m_bIgnoreFontWeight; + bool m_bIgnoreFontFace; - int m_iMinFontSize; - int m_iMaxFontSize; + int m_iMinFontSize; + int m_iMaxFontSize; - int m_iDefaultSubtitleDurationInMillisecs; + int m_iDefaultSubtitleDurationInMillisecs; - bool m_bTryToIgnoreErrors; + bool m_bTryToIgnoreErrors; }; diff --git a/src/subtitles/STS.cpp b/src/subtitles/STS.cpp index aacc38b19..447182853 100644 --- a/src/subtitles/STS.cpp +++ b/src/subtitles/STS.cpp @@ -579,7 +579,7 @@ static bool OpenSubRipper(CTextFile* file, CSimpleTextSubtitle& ret, int CharSet } } - return(!ret.IsEmpty()); + return !ret.IsEmpty(); } static bool OpenOldSubRipper(CTextFile* file, CSimpleTextSubtitle& ret, int CharSet) @@ -597,7 +597,7 @@ static bool OpenOldSubRipper(CTextFile* file, CSimpleTextSubtitle& ret, int Char } int hh1, mm1, ss1, hh2, mm2, ss2; - int c = swscanf(buff, L"{%d:%d:%d}{%d:%d:%d}", &hh1, &mm1, &ss1, &hh2, &mm2, &ss2); + int c = swscanf_s(buff, L"{%d:%d:%d}{%d:%d:%d}", &hh1, &mm1, &ss1, &hh2, &mm2, &ss2); if(c == 6) { @@ -676,13 +676,13 @@ static bool OpenSubViewer(CTextFile* file, CSimpleTextSubtitle& ret, int CharSet WCHAR sep; int hh1, mm1, ss1, hs1, hh2, mm2, ss2, hs2; - int c = swscanf(buff, L"%d:%d:%d%c%d,%d:%d:%d%c%d\n", - &hh1, &mm1, &ss1, &sep, &hs1, &hh2, &mm2, &ss2, &sep, &hs2); + int c = swscanf_s(buff, L"%d:%d:%d%c%d,%d:%d:%d%c%d\n", + &hh1, &mm1, &ss1, &sep, 1, &hs1, &hh2, &mm2, &ss2, &sep, 1, &hs2); if(c == 10) { CStringW str; - file->ReadString(str); + VERIFY(file->ReadString(str)); str.Replace(L"[br]", L"\\N"); @@ -730,7 +730,7 @@ static STSStyle* GetMicroDVDStyle(CString str, int CharSet) if(!_tcsnicmp(code, _T("{c:$"), 4)) { - _stscanf(code, _T("{c:$%x"), &ret->colors[0]); + _stscanf_s(code, _T("{c:$%x"), &ret->colors[0]); } else if(!_tcsnicmp(code, _T("{f:"), 3)) { @@ -739,12 +739,12 @@ static STSStyle* GetMicroDVDStyle(CString str, int CharSet) else if(!_tcsnicmp(code, _T("{s:"), 3)) { float f; - if(1 == _stscanf(code, _T("{s:%f"), &f)) + if(1 == _stscanf_s(code, _T("{s:%f"), &f)) ret->fontSize = f; } else if(!_tcsnicmp(code, _T("{h:"), 3)) { - _stscanf(code, _T("{h:%d"), &ret->charSet); + _stscanf_s(code, _T("{h:%d"), &ret->charSet); } else if(!_tcsnicmp(code, _T("{y:"), 3)) { @@ -757,7 +757,7 @@ static STSStyle* GetMicroDVDStyle(CString str, int CharSet) else if(!_tcsnicmp(code, _T("{p:"), 3)) { int p; - _stscanf(code, _T("{p:%d"), &p); + _stscanf_s(code, _T("{p:%d"), &p); ret->scrAlignment = (p == 0) ? 8 : 2; } @@ -803,7 +803,7 @@ static CStringW MicroDVD2SSA(CStringW str, bool fUnicode, int CharSet) code.MakeLower(); int color; - swscanf(code, L"{c:$%x", &color); + swscanf_s(code, L"{c:$%x", &color); code.Format(L"{\\c&H%x&}", color); ret += code; } @@ -819,8 +819,8 @@ static CStringW MicroDVD2SSA(CStringW str, bool fUnicode, int CharSet) fRestore[FONTSIZE] = (iswupper(code[1]) == 0); code.MakeLower(); - float size; - swscanf(code, L"{s:%f", &size); + double size; + swscanf_s(code, L"{s:%lf", &size); code.Format(L"{\\fs%f}", size); ret += code; } @@ -829,9 +829,9 @@ static CStringW MicroDVD2SSA(CStringW str, bool fUnicode, int CharSet) fRestore[COLOR] = (_istupper(code[1]) == 0); code.MakeLower(); - int CharSet; - swscanf(code, L"{h:%d", &CharSet); - code.Format(L"{\\fe%d}", CharSet); + int iCharSet; + swscanf_s(code, L"{h:%d", &iCharSet); + code.Format(L"{\\fe%d}", iCharSet); ret += code; } else if(!_wcsnicmp(code, L"{y:", 3)) @@ -853,7 +853,7 @@ static CStringW MicroDVD2SSA(CStringW str, bool fUnicode, int CharSet) int x, y; TCHAR c; - swscanf_s(code, L"{o:%d%c%d", &x, &c, 1, &y); + swscanf_s(code, L"{o:%d%c%d", &x, &c, 1, &y); code.Format(L"{\\move(%d,%d,0,0,0,0)}", x, y); ret += code; } @@ -884,7 +884,7 @@ static CStringW MicroDVD2SSA(CStringW str, bool fUnicode, int CharSet) } } - memset(fRestore, 0, sizeof(bool)*fRestoreLen); + ZeroMemory(fRestore, sizeof(bool)*fRestoreLen); ret += L"\\N"; } @@ -905,9 +905,9 @@ static bool OpenMicroDVD(CTextFile* file, CSimpleTextSubtitle& ret, int CharSet) if(buff.IsEmpty()) continue; int start, end; - int c = swscanf(buff, L"{%d}{%d}", &start, &end); + int c = swscanf_s(buff, L"{%d}{%d}", &start, &end); - if(c != 2) {c = swscanf(buff, L"{%d}{}", &start)+1; end = start + 60; fCheck = true;} + if(c != 2) {c = swscanf_s(buff, L"{%d}{}", &start)+1; end = start + 60; fCheck = true;} if(c != 2) { @@ -1192,7 +1192,7 @@ static bool OpenVPlayer(CTextFile* file, CSimpleTextSubtitle& ret, int CharSet) } int hh, mm, ss; - int c = swscanf(buff, L"%d:%d:%d:", &hh, &mm, &ss); + int c = swscanf_s(buff, L"%d:%d:%d:", &hh, &mm, &ss); if(c == 3) { @@ -1323,7 +1323,7 @@ static bool LoadFont(CString& font) const TCHAR* s = font; const TCHAR* e = s + len; - for(BYTE* p = pData; s < e; s++, p++) *p = *s - 33; + for(BYTE* p = pData; s < e; s++, p++) *p = BYTE(*s - 33); for(int i = 0, j = 0, k = len&~3; i < k; i+=4, j+=3) { @@ -1368,7 +1368,7 @@ static bool LoadFont(CString& font) chksum += ((DWORD*)(BYTE*)pData)[i]; CString fn; - fn.Format(_T("%sfont%08x.ttf"), path, chksum); + fn.Format(_T("%sfont%08lx.ttf"), path, chksum); CFileStatus fs; if(!CFileGetStatus(fn, fs)) @@ -1866,38 +1866,39 @@ static bool OpenMPL2(CTextFile* file, CSimpleTextSubtitle& ret, int CharSet) static bool OpenRealText(CTextFile* file, CSimpleTextSubtitle& ret, int CharSet) { - wstring szFile; - + std::wstring szFile; CStringW buff; - while(file->ReadString(buff)) - { + + while (file->ReadString(buff)) { FastTrim(buff); - if(buff.IsEmpty()) continue; + if (buff.IsEmpty()) { + continue; + } + + // Make sure that the subtitle file starts with a tag + if (szFile.empty() && buff.CompareNoCase(_T("second.c_str(), CharSet), + SubRipper2SSA(i->second.c_str(), CharSet), file->IsUnicode(), - line->first.first, - line->first.second); + i->first.first, + i->first.second); } - // std::wofstream wofsOut(L"c:/zzz.srt"); - // RealTextParser.OutputSRT(wofsOut); - - return(!ret.IsEmpty()); + return !ret.IsEmpty(); } typedef bool (*STSOpenFunct)(CTextFile* file, CSimpleTextSubtitle& ret, int CharSet); @@ -2227,7 +2228,7 @@ void CSimpleTextSubtitle::AddStyle(CString name, STSStyle* style) CString name2 = name_str; - if(i < len && _stscanf(name_str.Right(len-i), _T("%d"), &idx) == 1) + if(i < len && _stscanf_s(name_str.Right(len-i), _T("%d"), &idx) == 1) { name2 = name_str.Left(i); }