From b0c79943fe4123d2f28c1bc923875a9032ba00d6 Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Sun, 5 May 2024 20:51:50 +0300 Subject: [PATCH 01/20] Solve empty headers parsing for eml files --- AppCUI | 2 +- GViewCore/include/GView.hpp | 7 +- GViewCore/src/Unpack/Base64.cpp | 21 ++- .../src/View/LexicalViewer/TextParser.cpp | 8 +- GenericPlugins/Unpackers/src/Unpackers.cpp | 106 +-------------- Types/CPP/src/CPPFile.cpp | 4 +- Types/EML/include/eml.hpp | 11 +- Types/EML/src/EMLFile.cpp | 124 ++++++++---------- Types/EML/src/PanelInformation.cpp | 9 +- Types/INI/src/INIFile.cpp | 18 +-- Types/JS/src/JSFile.cpp | 4 +- 11 files changed, 107 insertions(+), 207 deletions(-) diff --git a/AppCUI b/AppCUI index 5713d4eb..94868e6d 160000 --- a/AppCUI +++ b/AppCUI @@ -1 +1 @@ -Subproject commit 5713d4eb4c3b09c7520807698f80bb528f985fba +Subproject commit 94868e6d92c1695a6cd0ddc1244dc5f03cad231d diff --git a/GViewCore/include/GView.hpp b/GViewCore/include/GView.hpp index 259042b1..cc4ffa0c 100644 --- a/GViewCore/include/GView.hpp +++ b/GViewCore/include/GView.hpp @@ -1017,15 +1017,15 @@ namespace View return u16string_view{ text + start, (size_t) (end - start) }; return u16string_view(); } - uint32 ParseUntillEndOfLine(uint32 index) const; - uint32 ParseUntillStartOfNextLine(uint32 index) const; + uint32 ParseUntilEndOfLine(uint32 index) const; + uint32 ParseUntilStartOfNextLine(uint32 index) const; uint32 Parse(uint32 index, bool (*validate)(char16 character)) const; uint32 ParseBackwards(uint32 index, bool (*validate)(char16 character)) const; uint32 ParseSameGroupID(uint32 index, uint32 (*charToID)(char16 character)) const; uint32 ParseSpace(uint32 index, SpaceType type = SpaceType::SpaceAndTabs) const; uint32 ParseString(uint32 index, StringFormat format = StringFormat::All) const; uint32 ParseNumber(uint32 index, NumberFormat format = NumberFormat::All) const; - uint32 ParseUntillText(uint32 index, string_view textToFind, bool ignoreCase) const; + uint32 ParseUntilText(uint32 index, string_view textToFind, bool ignoreCase) const; uint32 ParseUntilNextCharacterAfterText(uint32 index, string_view textToFind, bool ignoreCase) const; uint64 ComputeHash64(uint32 start, uint32 end, bool ignoreCase) const; uint32 ComputeHash32(uint32 start, uint32 end, bool ignoreCase) const; @@ -1440,6 +1440,7 @@ namespace App namespace Unpack::Base64 { CORE_EXPORT void Encode(BufferView view, Buffer& output); + CORE_EXPORT bool Decode(BufferView view, Buffer& output, bool& hasWarning, String& warningMessage); CORE_EXPORT bool Decode(BufferView view, Buffer& output); } }; // namespace GView diff --git a/GViewCore/src/Unpack/Base64.cpp b/GViewCore/src/Unpack/Base64.cpp index 949c104d..77d86632 100644 --- a/GViewCore/src/Unpack/Base64.cpp +++ b/GViewCore/src/Unpack/Base64.cpp @@ -44,14 +44,16 @@ void Encode(BufferView view, Buffer& output) output.AddMultipleTimes(string_view("=", 1), (3 - sequenceIndex) % 3); } -bool Decode(BufferView view, Buffer& output) +bool Decode(BufferView view, Buffer& output, bool& hasWarning, String& warningMessage) { uint32 sequence = 0; uint32 sequenceIndex = 0; char lastEncoded = 0; + hasWarning = false; + + output.Reserve((view.GetLength() / 4) * 3); - for (uint32 i = 0; i < view.GetLength(); ++i) - { + for (uint32 i = 0; i < view.GetLength(); ++i) { char encoded = view[i]; CHECK(encoded < sizeof(BASE64_DECODE_TABLE) / sizeof(*BASE64_DECODE_TABLE), false, ""); @@ -60,7 +62,8 @@ bool Decode(BufferView view, Buffer& output) } if (lastEncoded == '=' && sequenceIndex == 0) { - AppCUI::Dialogs::MessageBox::ShowError("Warning!", "Ignoring extra bytes after the end of buffer"); + hasWarning = true; + warningMessage = "Ignoring extra bytes after the end of buffer"; break; } @@ -79,6 +82,7 @@ bool Decode(BufferView view, Buffer& output) if (sequenceIndex % 4 == 0) { char* buffer = (char*) &sequence; + output.Add(string_view(buffer + 3, 1)); output.Add(string_view(buffer + 2, 1)); output.Add(string_view(buffer + 1, 1)); @@ -92,4 +96,13 @@ bool Decode(BufferView view, Buffer& output) return true; } + +bool Decode(BufferView view, Buffer& output) +{ + bool tempHasWarning; + String tempWarningMessage; + + return Decode(view, output, tempHasWarning, tempWarningMessage); } + +} // namespace GView::Unpack::Base64 diff --git a/GViewCore/src/View/LexicalViewer/TextParser.cpp b/GViewCore/src/View/LexicalViewer/TextParser.cpp index 2193b254..c3749918 100644 --- a/GViewCore/src/View/LexicalViewer/TextParser.cpp +++ b/GViewCore/src/View/LexicalViewer/TextParser.cpp @@ -102,7 +102,7 @@ TextParser::TextParser(u16string_view _text) if (this->text == nullptr) this->size = 0; // sanity check } -uint32 TextParser::ParseUntillEndOfLine(uint32 index) const +uint32 TextParser::ParseUntilEndOfLine(uint32 index) const { if (index >= size) return size; @@ -114,7 +114,7 @@ uint32 TextParser::ParseUntillEndOfLine(uint32 index) const } return index; } -uint32 TextParser::ParseUntillStartOfNextLine(uint32 index) const +uint32 TextParser::ParseUntilStartOfNextLine(uint32 index) const { if (index >= size) return size; @@ -225,7 +225,7 @@ uint32 TextParser::ParseSpace(uint32 index, SpaceType type) const } return index; } -uint32 TextParser::ParseUntillText(uint32 index, string_view textToFind, bool ignoreCase) const +uint32 TextParser::ParseUntilText(uint32 index, string_view textToFind, bool ignoreCase) const { if (index >= size) return size; @@ -281,7 +281,7 @@ uint32 TextParser::ParseUntillText(uint32 index, string_view textToFind, bool ig } uint32 TextParser::ParseUntilNextCharacterAfterText(uint32 index, string_view textToFind, bool ignoreCase) const { - auto pos = ParseUntillText(index, textToFind, ignoreCase); + auto pos = ParseUntilText(index, textToFind, ignoreCase); if (pos >= size) return size; return pos + (uint32) textToFind.size(); diff --git a/GenericPlugins/Unpackers/src/Unpackers.cpp b/GenericPlugins/Unpackers/src/Unpackers.cpp index c688aa97..42a67669 100644 --- a/GenericPlugins/Unpackers/src/Unpackers.cpp +++ b/GenericPlugins/Unpackers/src/Unpackers.cpp @@ -21,17 +21,6 @@ using namespace AppCUI::Graphics; using namespace GView::View; -constexpr char BASE64_ENCODE_TABLE[] = { 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', - 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' }; - -constexpr char BASE64_DECODE_TABLE[] = { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51 }; - - Plugin::Plugin() : Window("Unpackers", "d:c,w:140,h:40", WindowFlags::FixedPosition) { sync = Factory::CheckBox::Create(this, "&Unpackers", "x:2%,y:1,w:30"); @@ -76,7 +65,7 @@ void Plugin::Update() } list->DeleteAllItems(); - auto item = list->AddItem({ "Cosmin", "ViewName", "CevaFormat", "Base64" }); + auto item = list->AddItem({ "Ceva", "ViewName", "CevaFormat", "Base64" }); //auto desktop = AppCUI::Application::GetDesktop(); //const auto windowsNo = desktop->GetChildrenCount(); @@ -125,99 +114,6 @@ void Plugin::Update() //} } - -void Plugin::Base64Encode(BufferView view, Buffer& output) -{ - uint32 sequence = 0; - uint32 sequenceIndex = 0; - - // TODO: same as before, pass something that doesn't need extra preprocessing - for (uint32 i = 0; i < view.GetLength(); i += 2) - { - char decoded = view[i]; - - sequence |= decoded << ((3 - sequenceIndex) * 8); - sequenceIndex++; - - if (sequenceIndex % 3 == 0) - { - // get 4 encoded components out of this one - // 0x3f -> 0b00111111 - - char buffer[] = { - BASE64_ENCODE_TABLE[(sequence >> 26) & 0x3f], - BASE64_ENCODE_TABLE[(sequence >> 20) & 0x3f], - BASE64_ENCODE_TABLE[(sequence >> 14) & 0x3f], - BASE64_ENCODE_TABLE[(sequence >> 8) & 0x3f], - }; - - output.Add(string_view(buffer, 4)); - - sequence = 0; - sequenceIndex = 0; - } - } - - output.AddMultipleTimes(string_view("=", 1), (3 - sequenceIndex) % 3); -} - - -bool Plugin::Base64Decode(BufferView view, Buffer& output) -{ - uint32 sequence = 0; - uint32 sequenceIndex = 0; - char lastEncoded = 0; - - // TODO: pass something else as a parameter, not needing extra pasing in the function - for (uint32 i = 0; i < view.GetLength(); i += 2) // skip the second byte in the character - { - char encoded = view[i]; - CHECK(encoded < sizeof(BASE64_DECODE_TABLE) / sizeof(*BASE64_DECODE_TABLE), false, ""); - - if (encoded == '\r' || encoded == '\n') - { - continue; - } - - if (lastEncoded == '=' && sequenceIndex == 0) - { - AppCUI::Dialogs::MessageBox::ShowError("Warning!", "Ignoring extra bytes after the end of buffer"); - break; - } - - uint32 decoded; - - if (encoded == '=') - { - // padding - decoded = 0; - } - else - { - decoded = BASE64_DECODE_TABLE[encoded]; - CHECK(decoded != -1, false, ""); - } - - sequence |= decoded << (2 + (4 - sequenceIndex) * 6); - sequenceIndex++; - - if (sequenceIndex % 4 == 0) - { - char* buffer = (char*) &sequence; - output.Add(string_view(buffer + 3, 1)); - output.Add(string_view(buffer + 2, 1)); - output.Add(string_view(buffer + 1, 1)); - - sequence = 0; - sequenceIndex = 0; - } - - lastEncoded = encoded; - } - - return true; -} - // you're passing the callbacks - this needs to be statically allocated // but you should lazy initialize it - so make it a pointer static std::unique_ptr plugin{ nullptr }; diff --git a/Types/CPP/src/CPPFile.cpp b/Types/CPP/src/CPPFile.cpp index 7c1b34fa..5f824763 100644 --- a/Types/CPP/src/CPPFile.cpp +++ b/Types/CPP/src/CPPFile.cpp @@ -652,7 +652,7 @@ uint32 CPPFile::TokenizeOperator(const GView::View::LexicalViewer::TextParser& t } uint32 CPPFile::TokenizePreprocessDirective(const TextParser& text, TokensList& list, BlocksList& blocks, uint32 pos) { - auto eol = text.ParseUntillEndOfLine(pos); + auto eol = text.ParseUntilEndOfLine(pos); auto start = pos; pos = text.ParseSpace(pos + 1, SpaceType::SpaceAndTabs); if ((CharType::GetCharType(text[pos])) != CharType::Word) @@ -742,7 +742,7 @@ void CPPFile::Tokenize(uint32 start, uint32 end, const TextParser& text, TokensL idx = text.ParseSpace(idx, SpaceType::All); break; case CharType::SingleLineComment: - next = text.ParseUntillEndOfLine(idx); + next = text.ParseUntilEndOfLine(idx); tokenList.Add( TokenType::Comment, idx, diff --git a/Types/EML/include/eml.hpp b/Types/EML/include/eml.hpp index aa53df5e..7c5d9cca 100644 --- a/Types/EML/include/eml.hpp +++ b/Types/EML/include/eml.hpp @@ -2,8 +2,7 @@ #include "GView.hpp" -struct EML_Item_Record -{ +struct EML_Item_Record { uint32 parentStartIndex; uint32 startIndex; uint32 dataLength; @@ -38,10 +37,14 @@ namespace Type void ParseHeaders(GView::View::LexicalViewer::TextParser text, uint32& index); uint32 ParseHeaderFieldBody(GView::View::LexicalViewer::TextParser text, uint32 index); std::u16string ExtractContentType(GView::View::LexicalViewer::TextParser text, uint32 start, uint32 end); + void ExtractFieldNameAndBody( + GView::View::LexicalViewer::TextParser text, uint32& start, uint32& end, std::u16string& fieldName, std::u16string& fieldBody); public: EMLFile(); - virtual ~EMLFile() override {} + virtual ~EMLFile() override + { + } virtual std::string_view GetTypeName() override { @@ -51,6 +54,7 @@ namespace Type { // here } + public: Reference selectionZoneInterface; @@ -75,7 +79,6 @@ namespace Type // View::ContainerViewer::OpenItemInterface virtual void OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewItem item) override; - }; namespace Panels diff --git a/Types/EML/src/EMLFile.cpp b/Types/EML/src/EMLFile.cpp index 9effa425..6f7a0250 100644 --- a/Types/EML/src/EMLFile.cpp +++ b/Types/EML/src/EMLFile.cpp @@ -8,31 +8,45 @@ EMLFile::EMLFile() { } -std::u16string EMLFile::ExtractContentType(TextParser text, uint32 start, uint32 end) +void EMLFile::ExtractFieldNameAndBody(TextParser text, uint32& start, uint32& end, std::u16string& fieldName, std::u16string& fieldBody) { - start = text.ParseUntillText(start, "content-type", true); + // header-field name + end = text.ParseUntilText(start, ":", false); - if (start >= text.Len()) - { - return u""; - } + fieldName = text.GetSubString(start, end); + + // ltrim + start = end = text.ParseSpace(end + 1, SpaceType::SpaceAndTabs); - // TODO: make a function that extracts both the header and the field body - start = text.ParseUntillText(start, ":", false); - start = text.ParseSpace(start + 1, SpaceType::All); - end = ParseHeaderFieldBody(text, start); + // header-field body + end = ParseHeaderFieldBody(text, start); - std::u16string fieldBody(text.GetSubString(start, end)); + fieldBody = text.GetSubString(start, end); + // remove CRLF size_t pos = 0; while ((pos = fieldBody.find(u"\r\n", pos)) != std::u16string::npos) fieldBody.replace(pos, 2, u""); +} + +std::u16string EMLFile::ExtractContentType(TextParser text, uint32 start, uint32 end) +{ + start = text.ParseUntilText(start, "content-type", true); + + if (start >= text.Len()) { + return u""; + } + + std::u16string fieldName, fieldBody; + ExtractFieldNameAndBody(text, start, end, fieldName, fieldBody); return fieldBody.substr(0, fieldBody.find(u';')); } bool EMLFile::BeginIteration(std::u16string_view path, AppCUI::Controls::TreeViewItem parent) { + auto temp = parent.GetData(); + unicodeString.Add(obj->GetData().GetEntireFile()); TextParser text(unicodeString.ToStringView()); @@ -49,15 +63,12 @@ bool EMLFile::PopulateItem(AppCUI::Controls::TreeViewItem item) EML_Item_Record& itemData = items[itemsIndex]; TextParser text(unicodeString.ToStringView()); - if (itemData.leafNode) - { + if (itemData.leafNode) { item.SetText(0, contentType); - } - else - { + } else { item.SetText(0, ExtractContentType(text, itemData.startIndex, itemData.startIndex + itemData.dataLength)); } - + item.SetText(1, String().Format("%u", itemData.dataLength)); item.SetText(2, String().Format("%u", itemData.startIndex + itemData.parentStartIndex)); @@ -74,19 +85,21 @@ void EMLFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewIte auto bufferView = obj->GetData().GetEntireFile(); BufferView itemBufferView(bufferView.GetData() + itemData->startIndex, itemData->dataLength); - if (!itemData->leafNode) - { + if (!itemData->leafNode) { GView::App::OpenBuffer(itemBufferView, obj->GetName(), path, GView::App::OpenMethod::ForceType, "eml"); - } - else - { + } else { const auto& encodingHeader = - std::find_if(headerFields.begin(), headerFields.end(), [](const auto& item) { return item.first == u"Content-Transfer-Encoding"; }); + std::find_if(headerFields.begin(), headerFields.end(), [](const auto& item) { return item.first == u"Content-Transfer-Encoding"; }); if (encodingHeader != headerFields.end() && encodingHeader->second == u"base64") { Buffer output; - if (GView::Unpack::Base64::Decode(itemBufferView, output)) { + bool hasWarning; + String warningMessage; + if (GView::Unpack::Base64::Decode(itemBufferView, output, hasWarning, warningMessage)) { + if (hasWarning) { + AppCUI::Dialogs::MessageBox::ShowError("Warning!", warningMessage); + } GView::App::OpenBuffer(output, obj->GetName(), path, GView::App::OpenMethod::BestMatch); } else { AppCUI::Dialogs::MessageBox::ShowError("Error!", "Malformed base64 buffer!"); @@ -100,15 +113,14 @@ void EMLFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewIte uint32 EMLFile::ParseHeaderFieldBody(TextParser text, uint32 start) { - uint32 end = text.ParseUntillText(start, "\r\n", false); - - while (end + 2 < text.Len()) - { + uint32 end = text.ParseUntilText(start, "\r\n", false); + + while (end + 2 < text.Len()) { auto ch = text[end + 2]; if (ch != ' ' && ch != '\t') break; - end = text.ParseUntillText(end + 2, "\r\n", false); + end = text.ParseUntilText(end + 2, "\r\n", false); } return end; @@ -118,34 +130,17 @@ void EMLFile::ParseHeaders(GView::View::LexicalViewer::TextParser text, uint32& { uint32 start = index, end = index; - while (start < text.Len()) - { + while (start < text.Len()) { if (text.GetSubString(start, start + 2) == u"\r\n") // end of headers { start += 2; // skip CRLF break; } - // header-field name - end = text.ParseUntillText(start, ":", false); - - std::u16string fieldName(text.GetSubString(start, end)); - - // ltrim - start = end = text.ParseSpace(end + 1, SpaceType::All); - - // header-field body - end = ParseHeaderFieldBody(text, start); - - std::u16string fieldBody(text.GetSubString(start, end)); - - // remove CRLF - size_t pos = 0; - while ((pos = fieldBody.find(u"\r\n", pos)) != std::u16string::npos) - fieldBody.replace(pos, 2, u""); - - if (fieldName == u"Content-Type") - { + std::u16string fieldName, fieldBody; + ExtractFieldNameAndBody(text, start, end, fieldName, fieldBody); + + if (fieldName == u"Content-Type") { contentType = fieldBody; } @@ -164,13 +159,12 @@ void EMLFile::ParsePart(GView::View::LexicalViewer::TextParser text, uint32 star TextParser contentTypeParser(contentType); - uint32 typeEnd = contentTypeParser.ParseUntillText(0, "/", false); + uint32 typeEnd = contentTypeParser.ParseUntilText(0, "/", false); CHECKRET(typeEnd != contentTypeParser.Len(), ""); u16string_view type = contentTypeParser.GetSubString(0, typeEnd); - if (type == u"multipart") - { + if (type == u"multipart") { // get the boundary for the parts std::string boundary; @@ -179,15 +173,12 @@ void EMLFile::ParsePart(GView::View::LexicalViewer::TextParser text, uint32 star uint32 boundaryEnd; - if (contentTypeParser[boundaryStart] == '"') - { + if (contentTypeParser[boundaryStart] == '"') { // the boundary is enclosed in quotes boundaryStart++; - boundaryEnd = contentTypeParser.ParseUntillText(boundaryStart, "\"", false); - } - else - { - boundaryEnd = contentTypeParser.ParseUntillText(boundaryStart, ";", false); + boundaryEnd = contentTypeParser.ParseUntilText(boundaryStart, "\"", false); + } else { + boundaryEnd = contentTypeParser.ParseUntilText(boundaryStart, ";", false); } boundary = "--"; @@ -198,18 +189,16 @@ void EMLFile::ParsePart(GView::View::LexicalViewer::TextParser text, uint32 star uint32 partStart = start; uint32 partEnd; - do - { + do { partStart = text.ParseUntilNextCharacterAfterText(partStart, boundary, false); partStart = text.ParseSpace(partStart, SpaceType::All); - if (text.ParseUntillText(partStart, "--", false) == partStart) - { + if (text.ParseUntilText(partStart, "--", false) == partStart) { // end of part break; } - partEnd = text.ParseUntillText(partStart, boundary, false); + partEnd = text.ParseUntilText(partStart, boundary, false); // TODO: get the parent's index items.emplace_back(EML_Item_Record{ .parentStartIndex = 0, .startIndex = partStart, .dataLength = partEnd - partStart, .leafNode = false }); @@ -220,8 +209,7 @@ void EMLFile::ParsePart(GView::View::LexicalViewer::TextParser text, uint32 star return; } - if (type == u"message") - { + if (type == u"message") { items.emplace_back(EML_Item_Record{ .parentStartIndex = 0, .startIndex = start, .dataLength = end - start, .leafNode = false }); return; } diff --git a/Types/EML/src/PanelInformation.cpp b/Types/EML/src/PanelInformation.cpp index f3003b23..8e4fcabc 100644 --- a/Types/EML/src/PanelInformation.cpp +++ b/Types/EML/src/PanelInformation.cpp @@ -5,7 +5,7 @@ using namespace AppCUI::Controls; Panels::Information::Information(Reference _eml) : TabPage("&Information") { - eml = _eml; + eml = _eml; general = Factory::ListView::Create(this, "x:0,y:0,w:100%,h:10", { "n:Field,w:12", "n:Value,w:100" }, ListViewFlags::None); headers = Factory::ListView::Create(this, "x:0,y:10,w:100%,h:20", { "n:Field,w:12", "n:Value,w:10000" }, ListViewFlags::None); @@ -25,8 +25,7 @@ void Panels::Information::UpdateGeneralInformation() } headers->AddItem("Headers"); - for (const auto& itr : eml->headerFields) - { + for (const auto& itr : eml->headerFields) { headers->AddItem({ itr.first, itr.second }); } } @@ -36,8 +35,8 @@ void Panels::Information::UpdateIssues() } void Panels::Information::RecomputePanelsPositions() { - int w = this->GetWidth(); - int h = this->GetHeight(); + int w = this->GetWidth(); + int h = this->GetHeight(); if (!general.IsValid()) return; diff --git a/Types/INI/src/INIFile.cpp b/Types/INI/src/INIFile.cpp index aa5dec19..d5182105 100644 --- a/Types/INI/src/INIFile.cpp +++ b/Types/INI/src/INIFile.cpp @@ -143,7 +143,7 @@ struct ParserData switch (chType) { case CharType::Comment: - next = text.ParseUntillEndOfLine(pos); + next = text.ParseUntilEndOfLine(pos); tokenList.Add( TokenType::Comment, pos, @@ -187,7 +187,7 @@ struct ParserData state = ParserState::ExpectEqual; break; default: - next = text.ParseUntillEndOfLine(pos); + next = text.ParseUntilEndOfLine(pos); tokenList.Add(TokenType::Invalid, pos, next, TokenColor::Word) .SetError("Invalid character (expecting either a key or a section)"); pos = next; @@ -200,7 +200,7 @@ struct ParserData switch (chType) { case CharType::Comment: - next = text.ParseUntillEndOfLine(pos); + next = text.ParseUntilEndOfLine(pos); tokenList.Add( TokenType::Comment, pos, @@ -220,7 +220,7 @@ struct ParserData state = ParserState::ExpectKeyValueOrSection; break; case CharType::Invalid: - next = text.ParseUntillEndOfLine(pos); + next = text.ParseUntilEndOfLine(pos); tokenList.Add(TokenType::Invalid, pos, next, TokenColor::Word) .SetError("Invalid character (expecting either a avlue or an array)"); pos = next; @@ -256,7 +256,7 @@ struct ParserData switch (chType) { case CharType::Comment: - next = text.ParseUntillEndOfLine(pos); + next = text.ParseUntilEndOfLine(pos); tokenList.Add( TokenType::Comment, pos, @@ -281,7 +281,7 @@ struct ParserData state = ParserState::ExpectValueOrArray; break; default: - next = text.ParseUntillEndOfLine(pos); + next = text.ParseUntilEndOfLine(pos); tokenList.Add(TokenType::Invalid, pos, next, TokenColor::Word).SetError("Invalid character (expecting either ':' or '=')"); pos = next; state = ParserState::ExpectKeyValueOrSection; @@ -294,7 +294,7 @@ struct ParserData switch (chType) { case CharType::Comment: - next = text.ParseUntillEndOfLine(pos); + next = text.ParseUntilEndOfLine(pos); tokenList.Add( TokenType::Comment, pos, @@ -348,7 +348,7 @@ struct ParserData switch (chType) { case CharType::Comment: - next = text.ParseUntillEndOfLine(pos); + next = text.ParseUntilEndOfLine(pos); tokenList.Add( TokenType::Comment, pos, @@ -367,7 +367,7 @@ struct ParserData state = ParserState::ExpectCommaOrEndOfArray; break; case CharType::Invalid: - next = text.ParseUntillEndOfLine(pos); + next = text.ParseUntilEndOfLine(pos); tokenList.Add(TokenType::Invalid, pos, next, TokenColor::Word) .SetError("Invalid character (expecting either a avlue or an array)"); pos = next; diff --git a/Types/JS/src/JSFile.cpp b/Types/JS/src/JSFile.cpp index c102e0ae..602ee2f8 100644 --- a/Types/JS/src/JSFile.cpp +++ b/Types/JS/src/JSFile.cpp @@ -717,7 +717,7 @@ uint32 JSFile::TokenizeList(const TextParser& text, TokensList& tokenList, uint3 } uint32 JSFile::TokenizePreprocessDirective(const TextParser& text, TokensList& list, BlocksList& blocks, uint32 pos) { - auto eol = text.ParseUntillEndOfLine(pos); + auto eol = text.ParseUntilEndOfLine(pos); auto start = pos; pos = text.ParseSpace(pos + 1, SpaceType::SpaceAndTabs); if ((CharType::GetCharType(text[pos])) != CharType::Word) @@ -827,7 +827,7 @@ void JSFile::Tokenize(uint32 start, uint32 end, const TextParser& text, TokensLi idx = text.ParseSpace(idx, SpaceType::SpaceAndTabs); break; case CharType::SingleLineComment: - next = text.ParseUntillEndOfLine(idx); + next = text.ParseUntilEndOfLine(idx); tokenList.Add( TokenType::Comment, idx, From 6e3b8af62693c48446541a12cb40ff3a11bd4c6d Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Sun, 5 May 2024 21:42:34 +0300 Subject: [PATCH 02/20] Add more info to ListItem elements --- Types/EML/include/eml.hpp | 4 ++-- Types/EML/src/EMLFile.cpp | 23 +++++++++++++---------- Types/EML/src/eml.cpp | 3 ++- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/Types/EML/include/eml.hpp b/Types/EML/include/eml.hpp index 7c5d9cca..f3d1e280 100644 --- a/Types/EML/include/eml.hpp +++ b/Types/EML/include/eml.hpp @@ -3,9 +3,9 @@ #include "GView.hpp" struct EML_Item_Record { - uint32 parentStartIndex; - uint32 startIndex; + uint32 startOffset; uint32 dataLength; + uint32 partIndex; bool leafNode; }; diff --git a/Types/EML/src/EMLFile.cpp b/Types/EML/src/EMLFile.cpp index 6f7a0250..d7ec14ba 100644 --- a/Types/EML/src/EMLFile.cpp +++ b/Types/EML/src/EMLFile.cpp @@ -63,14 +63,19 @@ bool EMLFile::PopulateItem(AppCUI::Controls::TreeViewItem item) EML_Item_Record& itemData = items[itemsIndex]; TextParser text(unicodeString.ToStringView()); + item.SetText(0, String().Format("%u", itemData.partIndex)); + if (itemData.leafNode) { - item.SetText(0, contentType); + TextParser contentTypeParser(contentType); + uint32 typeEnd = contentTypeParser.ParseUntilText(0, ";", false); + u16string_view type = contentTypeParser.GetSubString(0, typeEnd); + item.SetText(1, type); } else { - item.SetText(0, ExtractContentType(text, itemData.startIndex, itemData.startIndex + itemData.dataLength)); + item.SetText(1, ExtractContentType(text, itemData.startOffset, itemData.startOffset + itemData.dataLength)); } - item.SetText(1, String().Format("%u", itemData.dataLength)); - item.SetText(2, String().Format("%u", itemData.startIndex + itemData.parentStartIndex)); + item.SetText(2, String().Format("%u", itemData.dataLength)); + item.SetText(3, String().Format("%u", itemData.startOffset)); item.SetData(&itemData); @@ -83,7 +88,7 @@ void EMLFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewIte auto itemData = item.GetData(); auto bufferView = obj->GetData().GetEntireFile(); - BufferView itemBufferView(bufferView.GetData() + itemData->startIndex, itemData->dataLength); + BufferView itemBufferView(bufferView.GetData() + itemData->startOffset, itemData->dataLength); if (!itemData->leafNode) { GView::App::OpenBuffer(itemBufferView, obj->GetName(), path, GView::App::OpenMethod::ForceType, "eml"); @@ -200,8 +205,7 @@ void EMLFile::ParsePart(GView::View::LexicalViewer::TextParser text, uint32 star partEnd = text.ParseUntilText(partStart, boundary, false); - // TODO: get the parent's index - items.emplace_back(EML_Item_Record{ .parentStartIndex = 0, .startIndex = partStart, .dataLength = partEnd - partStart, .leafNode = false }); + items.emplace_back(EML_Item_Record{.startOffset = partStart, .dataLength = partEnd - partStart, .partIndex = (uint32) items.size(), .leafNode = false }); partStart = partEnd; } while (partEnd < end); @@ -210,14 +214,13 @@ void EMLFile::ParsePart(GView::View::LexicalViewer::TextParser text, uint32 star } if (type == u"message") { - items.emplace_back(EML_Item_Record{ .parentStartIndex = 0, .startIndex = start, .dataLength = end - start, .leafNode = false }); + items.emplace_back(EML_Item_Record{.startOffset = start, .dataLength = end - start, .partIndex = (uint32) items.size(), .leafNode = false }); return; } // base case // simple type (text|application|...) - items.emplace_back(EML_Item_Record{ .parentStartIndex = 0, .startIndex = start, .dataLength = end - start, .leafNode = true }); - + items.emplace_back(EML_Item_Record{ .startOffset = start, .dataLength = end - start, .partIndex = (uint32) items.size(), .leafNode = true }); return; } } // namespace GView::Type::EML diff --git a/Types/EML/src/eml.cpp b/Types/EML/src/eml.cpp index 13baa12e..aee3a663 100644 --- a/Types/EML/src/eml.cpp +++ b/Types/EML/src/eml.cpp @@ -15,9 +15,10 @@ void CreateContainerView(Reference win, Reference< //settings.SetIcon(ISO_ICON); settings.SetColumns({ + "n:&Index,a:r,w:50", "n:&Content-Type,a:r,w:50", "n:&Size,a:r,w:20", - "n:&OffsetInFile,a:r,w:20", + "n:&Offset,a:r,w:20", }); settings.SetEnumerateCallback(win->GetObject()->GetContentType().ToObjectRef()); From 7b68b0a0dad5d2949ca984cda26dc5d4970c8c0a Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Fri, 10 May 2024 21:09:06 +0300 Subject: [PATCH 03/20] Avoid getting data --- Types/EML/src/EMLFile.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Types/EML/src/EMLFile.cpp b/Types/EML/src/EMLFile.cpp index d7ec14ba..a07f339e 100644 --- a/Types/EML/src/EMLFile.cpp +++ b/Types/EML/src/EMLFile.cpp @@ -45,7 +45,7 @@ std::u16string EMLFile::ExtractContentType(TextParser text, uint32 start, uint32 bool EMLFile::BeginIteration(std::u16string_view path, AppCUI::Controls::TreeViewItem parent) { - auto temp = parent.GetData(); + //auto temp = parent.GetData(); unicodeString.Add(obj->GetData().GetEntireFile()); TextParser text(unicodeString.ToStringView()); From 0dc58fd95a6ba90c6dc7d08be5694f11bc8fd02a Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Sat, 1 Jun 2024 16:30:26 +0300 Subject: [PATCH 04/20] Last sync --- CMakeLists.txt | 1 + Types/EML/src/EMLFile.cpp | 73 ++++++++++++++++++++++++++++++++------- Types/EML/src/eml.cpp | 3 -- 3 files changed, 61 insertions(+), 16 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 94475953..f03d9809 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -198,6 +198,7 @@ if(NOT DEFINED CMAKE_TESTING_ENABLED) add_subdirectory(Types/SQLite) add_subdirectory(Types/JCLASS) add_subdirectory(Types/EML) + add_subdirectory(Types/DOC) # Generic plugins supported by GView add_subdirectory(GenericPlugins/CharacterTable) diff --git a/Types/EML/src/EMLFile.cpp b/Types/EML/src/EMLFile.cpp index ec6a62a2..9d777d50 100644 --- a/Types/EML/src/EMLFile.cpp +++ b/Types/EML/src/EMLFile.cpp @@ -8,6 +8,46 @@ EMLFile::EMLFile() { } +bool EMLFile::ProcessData() +{ + unicodeString.Set(obj->GetData().GetEntireFile()); + TextParser text(unicodeString.ToStringView()); + + itemsIndex = 0; + items.clear(); + + ParsePart(text, 0, text.Len()); + if (items.empty()) + return false; + + uint32 modeNr = 1, attachmentsNr = 1; + for (auto& itemData : items) { + itemData.contentType = itemData.leafNode ? contentType : ExtractContentType(text, itemData.startOffset, itemData.startOffset + itemData.dataLength); + itemData.identifier = GetIdentifierFromContentType(itemData.contentType); + if (!itemData.identifier.empty()) + continue; + if (itemData.contentType.starts_with(u"multipart")) { + itemData.identifier = u"body message"; + } else if (itemData.contentType.starts_with(u"application")) { + LocalString<32> attachment = {}; + attachment.SetFormat("attachment %u", attachmentsNr); + LocalUnicodeStringBuilder<32> sb = {}; + sb.Set(attachment.GetText()); + itemData.identifier = sb.ToStringView(); + attachmentsNr++; + } else { + LocalString<32> mode = {}; + mode.SetFormat("mode %u", modeNr); + LocalUnicodeStringBuilder<32> sb = {}; + sb.Set(mode.GetText()); + itemData.identifier = sb.ToStringView(); + modeNr++; + } + } + + return true; +} + void EMLFile::ExtractFieldNameAndBody(TextParser text, uint32& start, uint32& end, std::u16string& fieldName, std::u16string& fieldBody) { // header-field name @@ -45,11 +85,6 @@ std::u16string EMLFile::ExtractContentType(TextParser text, uint32 start, uint32 bool EMLFile::BeginIteration(std::u16string_view path, AppCUI::Controls::TreeViewItem parent) { - //auto temp = parent.GetData(); - - unicodeString.Add(obj->GetData().GetEntireFile()); - TextParser text(unicodeString.ToStringView()); - itemsIndex = 0; return true; } @@ -84,7 +119,7 @@ void EMLFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewIte auto itemData = item.GetData(); TextParser text(unicodeString.ToStringView()); - auto currentContentType = ExtractContentType(text, itemData->startIndex, itemData->startIndex + itemData->dataLength); + auto currentContentType = ExtractContentType(text, itemData->startOffset, itemData->startOffset + itemData->dataLength); auto bufferView = obj->GetData().GetEntireFile(); BufferView itemBufferView(bufferView.GetData() + itemData->startOffset, itemData->dataLength); @@ -101,14 +136,26 @@ void EMLFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewIte if (encodingHeader != headerFields.end()) { Buffer output; - - bool hasWarning; - String warningMessage; - if (GView::Unpack::Base64::Decode(itemBufferView, output, hasWarning, warningMessage)) { - if (hasWarning) { - AppCUI::Dialogs::MessageBox::ShowError("Warning!", warningMessage); + + if (encodingHeader->second == u"base64") { + bool hasWarning; + String warningMessage; + + if (GView::Unpack::Base64::Decode(itemBufferView, output, hasWarning, warningMessage)) { + if (hasWarning) { + AppCUI::Dialogs::MessageBox::ShowError("Warning!", warningMessage); + } + + GView::App::OpenBuffer(output, bufferName, path, GView::App::OpenMethod::BestMatch); + } else { + AppCUI::Dialogs::MessageBox::ShowError("Error!", "Malformed base64 buffer!"); + } + } else if (encodingHeader->second == u"quoted-printable") { + if (GView::Unpack::QuotedPrintable::Decode(itemBufferView, output)) { + GView::App::OpenBuffer(output, bufferName, path, GView::App::OpenMethod::BestMatch); + } else { + AppCUI::Dialogs::MessageBox::ShowError("Error!", "Malformed quoted-printable buffer!"); } - GView::App::OpenBuffer(output, obj->GetName(), path, GView::App::OpenMethod::BestMatch); } else { GView::App::OpenBuffer(itemBufferView, bufferName, path, GView::App::OpenMethod::BestMatch); } diff --git a/Types/EML/src/eml.cpp b/Types/EML/src/eml.cpp index b64ab9a5..2d6f8ecb 100644 --- a/Types/EML/src/eml.cpp +++ b/Types/EML/src/eml.cpp @@ -46,9 +46,6 @@ void CreateContainerView(Reference win, Reference< const auto& headers = eml->GetHeaders(); for (const auto& [name, value] : headers) { - if (name == u"Cc") // TODO: to be removed when issues https://github.com/gdt050579/GView/issues/301 is fixed - continue; - std::string nameStr = toUTF8(name); settings.AddProperty(nameStr, value); } From ccdca64d902786496c00c160f0b725d6b4d91df4 Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Sat, 1 Jun 2024 16:31:31 +0300 Subject: [PATCH 05/20] Add primitive parsing for dir and Module streams --- Types/DOC/CMakeLists.txt | 2 + Types/DOC/include/doc.hpp | 122 ++++++ Types/DOC/src/ByteStream.cpp | 25 ++ Types/DOC/src/CMakeLists.txt | 4 + Types/DOC/src/DOCFile.cpp | 580 +++++++++++++++++++++++++++++ Types/DOC/src/PanelInformation.cpp | 46 +++ Types/DOC/src/doc.cpp | 81 ++++ 7 files changed, 860 insertions(+) create mode 100644 Types/DOC/CMakeLists.txt create mode 100644 Types/DOC/include/doc.hpp create mode 100644 Types/DOC/src/ByteStream.cpp create mode 100644 Types/DOC/src/CMakeLists.txt create mode 100644 Types/DOC/src/DOCFile.cpp create mode 100644 Types/DOC/src/PanelInformation.cpp create mode 100644 Types/DOC/src/doc.cpp diff --git a/Types/DOC/CMakeLists.txt b/Types/DOC/CMakeLists.txt new file mode 100644 index 00000000..8da56abd --- /dev/null +++ b/Types/DOC/CMakeLists.txt @@ -0,0 +1,2 @@ +include(type) +create_type(DOC) diff --git a/Types/DOC/include/doc.hpp b/Types/DOC/include/doc.hpp new file mode 100644 index 00000000..fb7cceaf --- /dev/null +++ b/Types/DOC/include/doc.hpp @@ -0,0 +1,122 @@ +#pragma once + +#include "GView.hpp" + +namespace GView +{ +namespace Type +{ + namespace DOC + { + namespace Panels + { + class Information; + } + + class ByteStream + { + private: + void* ptr; + size_t size; + size_t cursor; + + public: + ByteStream(void* ptr, size_t size) : ptr(ptr), size(size), cursor(0) {}; + + BufferView Read(size_t count); + template T ReadAs() { + size_t count = sizeof(T); + if (cursor + count > size) { + count = size - cursor; + } + T value = *(T*) ((uint8*) ptr + cursor); + cursor += count; + return value; + } + + void Seek(size_t count); + + size_t GetCursor() { + return cursor; + }; + + size_t GetSize() + { + return size; + } + }; + + class DOCFile : public TypeInterface, public View::ContainerViewer::EnumerateInterface, public View::ContainerViewer::OpenItemInterface + { + private: + friend class Panels::Information; + + public: + DOCFile(); + virtual ~DOCFile() override + { + } + + virtual std::string_view GetTypeName() override + { + return "DOC"; + } + virtual void RunCommand(std::string_view command) override + { + // here + } + + public: + + bool ProcessData(); + Reference selectionZoneInterface; + + uint32 GetSelectionZonesCount() override + { + CHECK(selectionZoneInterface.IsValid(), 0, ""); + return selectionZoneInterface->GetSelectionZonesCount(); + } + + TypeInterface::SelectionZone GetSelectionZone(uint32 index) override + { + static auto d = TypeInterface::SelectionZone{ 0, 0 }; + CHECK(selectionZoneInterface.IsValid(), d, ""); + CHECK(index < selectionZoneInterface->GetSelectionZonesCount(), d, ""); + + return selectionZoneInterface->GetSelectionZone(index); + } + + // View::ContainerViewer::EnumerateInterface + virtual bool BeginIteration(std::u16string_view path, AppCUI::Controls::TreeViewItem parent) override; + virtual bool PopulateItem(AppCUI::Controls::TreeViewItem item) override; + + // View::ContainerViewer::OpenItemInterface + virtual void OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewItem item) override; + }; + + namespace Panels + { + class Information : public AppCUI::Controls::TabPage + { + Reference doc; + Reference general; + Reference headers; + + void UpdateGeneralInformation(); + void UpdateIssues(); + void RecomputePanelsPositions(); + + public: + Information(Reference doc); + + void Update(); + virtual void OnAfterResize(int newWidth, int newHeight) override + { + RecomputePanelsPositions(); + } + }; + }; // namespace Panels + + } // namespace DOC +} // namespace Type +} // namespace GView diff --git a/Types/DOC/src/ByteStream.cpp b/Types/DOC/src/ByteStream.cpp new file mode 100644 index 00000000..f6deb1bc --- /dev/null +++ b/Types/DOC/src/ByteStream.cpp @@ -0,0 +1,25 @@ +#include "doc.hpp" + + +using namespace GView::Type::DOC; + + +BufferView ByteStream::Read(size_t count) +{ + if (cursor + count > size) { + count = size - cursor; + } + + BufferView view((uint8*)ptr + cursor, count); + cursor += count; + + return view; +} + +void ByteStream::Seek(size_t count) +{ + if (cursor + count > size) { + count = size - cursor; + } + cursor += count; +} diff --git a/Types/DOC/src/CMakeLists.txt b/Types/DOC/src/CMakeLists.txt new file mode 100644 index 00000000..99544f80 --- /dev/null +++ b/Types/DOC/src/CMakeLists.txt @@ -0,0 +1,4 @@ +target_sources(DOC PRIVATE + doc.cpp + DOCFile.cpp + PanelInformation.cpp) \ No newline at end of file diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp new file mode 100644 index 00000000..7c9fdc79 --- /dev/null +++ b/Types/DOC/src/DOCFile.cpp @@ -0,0 +1,580 @@ +#include "doc.hpp" + +#include // TODO: remove + +namespace GView::Type::DOC +{ +using namespace GView::View::LexicalViewer; + +#define ENDOFCHAIN 0xfffffffe +#define FREESECT 0xffffffff +#define FATSECT 0xfffffffd +#define DIFSECT 0xfffffffc + + +DOCFile::DOCFile() +{ + +} + +bool DecompressStream(BufferView bv, Buffer& decompressed) +{ + // TODO: document the compression algorithm and expose it into the core + + CHECK(bv[0] == 0x01, false, ""); // signature byte + + size_t index = 1; + + while (index < bv.GetLength()) { + // loop over chunks + + uint16 header = bv[index] + (bv[index + 1] << 8); + index += 2; + + uint16 chunkLength = header & 0x0fff; // + 3, for total size + bool isCompressed = header & 0x8000; // most significant bit + + CHECK((header & 0x7000) >> 12 == 0b011, false, ""); // fixed value + + if (!isCompressed) { + // TODO: verify + CHECK(index + 4096 < bv.GetLength(), false, ""); + decompressed.Add(BufferView(bv.GetData() + index, 4096)); + index += 4096; + continue; + } + + // Token Sequence series + while (index < chunkLength + 3) { + unsigned char flags = bv[index++]; + for (int i = 0; i < 8; ++i) { + if (index > chunkLength + 3) { + break; + } + + if (flags & 0x01) { + // 2 bytes (Copy Token) + + int offsetBits = ceil(log2(decompressed.GetLength())); // number of bits used for the offset value + + if (offsetBits < 4) { + offsetBits = 4; + } else if (offsetBits > 12) { + offsetBits = 12; + } + + uint16 token = bv[index] + (bv[index + 1] << 8); + uint16 offsetMask = 0xffff << (16 - offsetBits); + + int offset = ((token & offsetMask) >> (16 - offsetBits)) + 1; // negative offset from the current decompressed position + int length = (token & ~offsetMask) + 3; // the stored value is 3 less than the actual value + + // tail copy bytes may be written to the decompressed buffer while starting to copy the chunk + size_t startOffset = decompressed.GetLength() - offset; + for (size_t cursor = startOffset; cursor < startOffset + length; ++cursor) { + unsigned char byte = decompressed[cursor]; + decompressed.Add(BufferView(&byte, 1)); + } + + index += 2; + } else { + // 1 byte (Literal token) + + unsigned char byte = bv[index]; + decompressed.Add(BufferView(&byte, 1)); + index++; + } + + flags >>= 1; + } + } + } + + BufferView view(decompressed.GetData(), decompressed.GetLength()); + + GView::App::OpenBuffer(decompressed, "decompressed", "", GView::App::OpenMethod::BestMatch, "bin"); + + return true; +} + +enum SysKind { Win16Bit = 0, Win32Bit, Macintosh, Win64Bit }; + + +struct REFERENCECONTROL_Record { + uint32 recordIndex; + String libidTwiddled; + String nameRecordExtended; + String libidExtended; + BufferView originalTypeLib; + uint32 cookie; +}; + +struct REFERENCEORIGINAL_Record { + uint32 recordIndex; + String libidOriginal; + REFERENCECONTROL_Record referenceControl; +}; + +struct REFERENCEREGISTERED_Record { + uint32 recordIndex; + String libid; +}; + +struct REFERENCEPROJECT_Record { + uint32 recordIndex; + String libidAbsolute; + String libidRelative; + uint32 majorVersion; + uint16 minorVersion; +}; + + +struct MODULE_Record { + String moduleName; + String streamName; + String docString; + uint32 textOffset; + uint32 helpContext; +}; + + +bool ParseUncompressedDirStream(BufferView bv) +{ + ByteStream stream((void*) bv.GetData(), bv.GetLength()); + uint16 check; + + // PROJECTINFORMATION + CHECK(stream.ReadAs() == 0x01, false, "projectsyskind_id"); + CHECK(stream.ReadAs() == 0x04, false, "projectsyskind_size"); + + SysKind sysKind = (SysKind) stream.ReadAs(); + + CHECK(stream.ReadAs() == 0x02, false, "projectlcid_id"); + CHECK(stream.ReadAs() == 0x04, false, "projectlcid_size"); + CHECK(stream.ReadAs() == 0x0409, false, "projectlcid_lcid"); + + CHECK(stream.ReadAs() == 0x14, false, "projectlcidinvoke_id"); + CHECK(stream.ReadAs() == 0x04, false, "projectlcidinvoke_size"); + CHECK(stream.ReadAs() == 0x0409, false, "lcidinvoke"); + + CHECK(stream.ReadAs() == 0x03, false, "projectcodepage_id"); + CHECK(stream.ReadAs() == 0x02, false, "projectcodepage_size"); + auto codePage = stream.ReadAs(); // TODO: what to do with the codec? + + CHECK(stream.ReadAs() == 0x04, false, "projectname_id"); + auto projectName_size = stream.ReadAs(); + CHECK(projectName_size >= 1 && projectName_size <= 128, false, "projectname_size"); + String projectName(stream.Read(projectName_size)); + + CHECK(stream.ReadAs() == 0x05, false, "projectdocstring_id"); + auto projectDocString_size = stream.ReadAs(); + CHECK(projectDocString_size <= 2000, false, "projectdocstring_size"); + String docstring(stream.Read(projectDocString_size)); // TODO: decode + + CHECK(stream.ReadAs() == 0x40, false, "reserved"); + auto projectDocStringUnicode_size = stream.ReadAs(); + CHECK(projectDocStringUnicode_size % 2 == 0, false, "projectDocStringUnicode_size"); + UnicodeStringBuilder projectDocStringUnicode(stream.Read(projectDocStringUnicode_size)); // TODO: decode + + CHECK(stream.ReadAs() == 0x06, false, "helpFile1_id"); + auto helpFile1_size = stream.ReadAs(); + CHECK(helpFile1_size <= 260, false, "helpFile1_size"); + String helpFile1(stream.Read(helpFile1_size)); + CHECK(stream.ReadAs() == 0x3d, false, "reserved"); + auto helpFile2_size = stream.ReadAs(); + CHECK(helpFile2_size == helpFile1_size, false, "helpFile2_size"); + String helpFile2(stream.Read(helpFile2_size)); + for (uint32 i = 0; i < helpFile1_size; ++i) { + CHECK(helpFile1[i] == helpFile2[i], false, "helpFiles"); + } + + CHECK(stream.ReadAs() == 0x07, false, "projectHelpContext_id"); + CHECK(stream.ReadAs() == 0x04, false, "projectHelpContext_size"); + auto projectHelpContext = stream.ReadAs(); + + CHECK(stream.ReadAs() == 0x08, false, "projectLibFlags_id"); + CHECK(stream.ReadAs() == 0x04, false, "projectLibFlags_size"); + CHECK(stream.ReadAs() == 0x00, false, "projectLibFlags"); + + CHECK(stream.ReadAs() == 0x09, false, "projectVersoin_id"); + CHECK(stream.ReadAs() == 0x04, false, "reserved"); + auto versionMajor = stream.ReadAs(); + auto versionMinor = stream.ReadAs(); + + CHECK(stream.ReadAs() == 0x0c, false, "projectConstants_id"); + auto projectConstants_size = stream.ReadAs(); + CHECK(projectConstants_size <= 1015, false, "projectConstants_size"); + + String constants(stream.Read(projectConstants_size)); // TODO: decode and ABNF + CHECK(stream.ReadAs() == 0x3c, false, "reserved"); + + auto projectConstantsUnicode_size = stream.ReadAs(); + CHECK(projectConstantsUnicode_size % 2 == 0, false, "projectConstantsUnicode_size"); + UnicodeStringBuilder constantsUnicode(stream.Read(projectConstantsUnicode_size)); // TODO: decode and ABNF + + uint32 recordIndex = 0; + + // PROJECTREFERENCES + while (true) { + // NameRecord + auto referenceName_id = stream.ReadAs(); + if (referenceName_id == 0x0f) { + // end of Records array and beginning of PROJECTMODULES Record + break; + } + + CHECK(referenceName_id == 0x16, false, "referenceName_id"); + auto referenceName_size = stream.ReadAs(); + String referenceName(stream.Read(referenceName_size)); // TODO: decode and ABNF + CHECK(stream.ReadAs() == 0x3e, false, "reserved"); + auto referenceNameUnicode_size = stream.ReadAs(); + UnicodeStringBuilder referenceNameUnicode(stream.Read(referenceNameUnicode_size)); + + // ReferenceRecord + auto referenceRecord_type = stream.ReadAs(); + switch (referenceRecord_type) { + case 0x2f: + { + // REFERENCECONTROL Record + + REFERENCECONTROL_Record record; + record.recordIndex = recordIndex; + + stream.Seek(sizeof(uint32)); // SizeTwiddled + auto sizeOfLibidTwiddled = stream.ReadAs(); + // TODO: check string - https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-ovba/d64485fa-8562-4726-9c5e-11e8f01a81c0 + record.libidTwiddled = String(stream.Read(sizeOfLibidTwiddled)); + CHECK(stream.ReadAs() == 0x00, false, "reserved1"); + CHECK(stream.ReadAs() == 0x00, false, "reserved2"); + + check = stream.ReadAs(); + + if (check == 0x16) { + // optional NameRecordExtended + auto sizeOfName = stream.ReadAs(); + record.nameRecordExtended = String(stream.Read(sizeOfName)); // TODO: decode and ABNF + CHECK(stream.ReadAs() == 0x3e, false, "reserved"); + auto sizeOfNameUnicode = stream.ReadAs(); + UnicodeStringBuilder nameUnicode(stream.Read(sizeOfNameUnicode)); + check = stream.ReadAs(); + } + + CHECK(check == 0x30, false, "reserved3"); + stream.Seek(sizeof(uint32)); // SizeExtended + auto sizeOfLibidExtended = stream.ReadAs(); + record.libidExtended = String(stream.Read(sizeOfLibidExtended)); // TODO: decode and ABNF + CHECK(stream.ReadAs() == 0x00, false, "reserved4"); + CHECK(stream.ReadAs() == 0x00, false, "reserved5"); + record.originalTypeLib = BufferView(stream.Read(16)); + record.cookie = stream.ReadAs(); + + break; + } + case 0x33: { + // REFERENCEORIGINAL Record + + REFERENCEORIGINAL_Record record; + record.recordIndex = recordIndex; + + auto sizeOfLibidOriginal = stream.ReadAs(); + record.libidOriginal = String(stream.Read(sizeOfLibidOriginal)); // TODO: decode and ABNF + CHECK(stream.ReadAs() == 0x2f, false, "referenceControl_id"); + + stream.Seek(sizeof(uint32)); // SizeTwiddled + auto sizeOfLibidTwiddled = stream.ReadAs(); + record.referenceControl.libidTwiddled = String(stream.Read(sizeOfLibidTwiddled)); + CHECK(stream.ReadAs() == 0x00, false, "reserved1"); + CHECK(stream.ReadAs() == 0x00, false, "reserved2"); + + check = stream.ReadAs(); + + if (check == 0x16) { + // optional NameRecordExtended + auto sizeOfName = stream.ReadAs(); + record.referenceControl.nameRecordExtended = String(stream.Read(sizeOfName)); // TODO: decode and ABNF + CHECK(stream.ReadAs() == 0x3e, false, "reserved"); + auto sizeOfNameUnicode = stream.ReadAs(); + UnicodeStringBuilder nameUnicode(stream.Read(sizeOfNameUnicode)); + check = stream.ReadAs(); + } + + CHECK(check == 0x30, false, "reserved3"); + stream.Seek(sizeof(uint32)); // SizeExtended + auto sizeOfLibidExtended = stream.ReadAs(); + record.referenceControl.libidExtended = String(stream.Read(sizeOfLibidExtended)); // TODO: decode and ABNF + CHECK(stream.ReadAs() == 0x00, false, "reserved4"); + CHECK(stream.ReadAs() == 0x00, false, "reserved5"); + record.referenceControl.originalTypeLib = BufferView(stream.Read(16)); + record.referenceControl.cookie = stream.ReadAs(); + + break; + } + case 0x0d: { + // REFERENCEREGISTERED Record + + REFERENCEREGISTERED_Record record; + record.recordIndex = recordIndex; + + stream.Seek(sizeof(uint32)); // ignored Size + + auto sizeOfLibid = stream.ReadAs(); + record.libid = String(stream.Read(sizeOfLibid)); // TODO: decode and ABNF + + CHECK(stream.ReadAs() == 0x00, false, "reserved1"); + CHECK(stream.ReadAs() == 0x00, false, "reserved2"); + + break; + } + case 0x0e: { + // REFERENCEPROJECT Record + + REFERENCEPROJECT_Record record; + record.recordIndex = recordIndex; + + stream.Seek(sizeof(uint32)); // ignored Size + auto sizeOfLibidAbsolute = stream.ReadAs(); + record.libidAbsolute = String(stream.Read(sizeOfLibidAbsolute)); // TODO: decode and ABNF + auto sizeOfLibidRelative = stream.ReadAs(); + record.libidRelative = String(stream.Read(sizeOfLibidRelative)); // TODO: decode and ABNF + + record.majorVersion = stream.ReadAs(); + record.minorVersion = stream.ReadAs(); + + break; + } + default: + return false; + } + + recordIndex++; + } + + // PROJECTMODULES + CHECK(stream.ReadAs() == 0x02, false, "size"); + auto modulesCount = stream.ReadAs(); + CHECK(stream.ReadAs() == 0x13, false, "projectCookie_id"); + CHECK(stream.ReadAs() == 0x02, false, "projectCookie_size"); + stream.Seek(sizeof(uint16)); // ignored Cookie + + std::vector moduleRecords(modulesCount); + + // array of MODULE records + for (uint32 moduleIndex = 0; moduleIndex < modulesCount; ++moduleIndex) { + // TODO: check this - MUST have a corresponding specified in PROJECT Stream + + MODULE_Record& moduleRecord = moduleRecords[moduleIndex]; + + CHECK(stream.ReadAs() == 0x19, false, "moduleName_id"); + auto sizeOfModuleName = stream.ReadAs(); + // TODO: decode and ABNF + moduleRecord.moduleName = String(stream.Read(sizeOfModuleName)); + + CHECK(stream.ReadAs() == 0x47, false, "moduleNameUnicode_id"); + auto sizeOfModuleNameUnicode = stream.ReadAs(); + CHECK(sizeOfModuleNameUnicode % 2 == 0, false, "sizeOfModuleNameUnicode"); + UnicodeStringBuilder moduleNameUnicode(stream.Read(sizeOfModuleNameUnicode)); // TODO: decode and ABNF + + CHECK(stream.ReadAs() == 0x1a, false, "moduleStreamName_id"); + auto sizeOfStreamName = stream.ReadAs(); + moduleRecord.streamName = String(stream.Read(sizeOfStreamName)); // TODO: decode and ABNF + CHECK(stream.ReadAs() == 0x32, false, "reserved"); + + auto sizeOfStreamNameUnicode = stream.ReadAs(); + CHECK(sizeOfStreamNameUnicode % 2 == 0, false, "sizeOfStreamNameUnicode"); + String streamNameUnicode(stream.Read(sizeOfStreamNameUnicode)); // TODO: decode and ABNF + + CHECK(stream.ReadAs() == 0x1c, false, "moduleDocString_id"); + auto sizeOfDocString = stream.ReadAs(); + // TODO: decode and ABNF + moduleRecord.docString = String(stream.Read(sizeOfDocString)); + CHECK(stream.ReadAs() == 0x48, false, "reserved"); + auto sizeOfDocStringUnicode = stream.ReadAs(); + CHECK(sizeOfDocStringUnicode % 2 == 0, false, "sizeOfDocStringUnicode"); + UnicodeStringBuilder docStringUnicode(stream.Read(sizeOfDocStringUnicode)); + + CHECK(stream.ReadAs() == 0x31, false, "moduleOffset_id"); + CHECK(stream.ReadAs() == 0x04, false, "moduleOffset_size"); + moduleRecord.textOffset = stream.ReadAs(); + + CHECK(stream.ReadAs() == 0x1e, false, "moduleHelpContext_id"); + CHECK(stream.ReadAs() == 0x04, false, "moduleHelpContext_size"); + moduleRecord.helpContext = stream.ReadAs(); + + CHECK(stream.ReadAs() == 0x2c, false, "moduleCookie_id"); + CHECK(stream.ReadAs() == 0x02, false, "moduleCookie_size"); + stream.Seek(sizeof(uint16)); // ignored Cookie + + auto moduleType_id = stream.ReadAs(); + CHECK(moduleType_id == 0x21 || moduleType_id == 0x22, false, "moduleType_id"); + stream.Seek(sizeof(uint32)); // ignored Reserved + + check = stream.ReadAs(); + if (check == 0x25) { + // optional MODULEREADONLY + stream.Seek(sizeof(uint32)); // ignored Reserved + check = stream.ReadAs(); + } + + if (check == 0x28) { + // optional MODULEPRIVATE + stream.Seek(sizeof(uint32)); // ignored Reserved + check = stream.ReadAs(); + } + + auto terminator = check; + CHECK(terminator == 0x2b, false, "terminator"); + stream.Seek(sizeof(uint32)); // ignored Reserved + } + + CHECK(stream.ReadAs() == 0x10, false, "terminator"); + stream.Seek(sizeof(uint32)); // ignored Reserved + + CHECK(stream.GetCursor() == stream.GetSize(), false, "buffer still available"); + return true; +} + +bool ParseModuleStream(BufferView bv) +{ + constexpr size_t moduleTextOffset = 2607; // TODO: in the future get this from the parsed dir stream + + ByteStream stream((void*) bv.GetData(), bv.GetLength()); + + stream.Seek(moduleTextOffset); + + auto compressed = stream.Read(stream.GetSize() - stream.GetCursor()); + + Buffer decompressed; + + DecompressStream(compressed, decompressed); + return true; +} + +bool ParseVBAProject(BufferView bv) +{ + ByteStream stream((void*) bv.GetData(), bv.GetLength()); + + // TODO: extract in outer stuff + constexpr uint8 headerSignature[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; + for (uint32 i = 0; i < ARRAY_LEN(headerSignature); ++i) { + CHECK(stream.ReadAs() == headerSignature[i], false, "headerSignature"); + } + + CHECK(stream.ReadAs() == 0, false, "headerCLSID"); + CHECK(stream.ReadAs() == 0, false, "headerCLSID"); + + auto minorVersion = stream.ReadAs(); // TODO: This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004. + auto majorVersion = stream.ReadAs(); + CHECK(majorVersion == 0x03 || majorVersion == 0x04, false, "majorVersion"); + + CHECK(stream.ReadAs() == 0xfffe, false, "byteOrder"); + auto sectorShift = stream.ReadAs(); + CHECK((majorVersion == 0x03 && sectorShift == 0x09) || (majorVersion == 0x04 && sectorShift == 0x0c), false, "sectorShift"); + uint16 sectorSize = 1 << sectorShift; + + auto miniSectorShift = stream.ReadAs(); + CHECK(miniSectorShift == 0x06, false, "miniSectorShift"); + uint16 miniSectorSize = 1 << miniSectorShift; + + CHECK(stream.ReadAs() == 0x00, false, "reserved"); + CHECK(stream.ReadAs() == 0x00, false, "reserved"); + + auto numberOfDirectorySectors = stream.ReadAs(); + if (majorVersion == 0x03) { + CHECK(numberOfDirectorySectors == 0x00, false, "numberOfDirectorySectors"); + } + + auto numberOfFatSectors = stream.ReadAs(); + auto firstDirectorySectorLocation = stream.ReadAs(); + auto transactionSignatureNumber = stream.ReadAs(); // incremented every time the file is saved + + CHECK(stream.ReadAs() == 0x1000, false, "miniStreamCutoffSize"); + + auto firstMiniFatSectorLocation = stream.ReadAs(); + auto numberOfMiniFatSectors = stream.ReadAs(); + auto firstDifatSectorLocation = stream.ReadAs(); + auto numberOfDifatSectors = stream.ReadAs(); + + constexpr size_t locationsCount = 109; + uint32 DIFAT[locationsCount]; // the first 109 FAT sector locations of the compound file + { + auto difatBv = stream.Read(locationsCount * sizeof(*DIFAT)); + memcpy(DIFAT, (void*) difatBv.GetData(), difatBv.GetLength()); + } + + if (majorVersion == 0x04) { + stream.Seek(3584); // TODO: MUST check if they are all zeros + } + + { // TODO: remove + std::ofstream out("D:\\temp\\ceva", std::ios::binary | std::ios::trunc); + auto temp = stream.Read(stream.GetSize()); + out.write((const char*) temp.GetData(), temp.GetLength()); + } + + uint16 actualNumberOfSectors = ((bv.GetLength() + sectorSize - 1) / sectorSize) - 1; + + // load FAT + + Buffer fat; + + for (size_t locationIndex = 0; locationIndex < locationsCount; ++locationIndex) { + uint32 sect = DIFAT[locationIndex]; + if (sect == ENDOFCHAIN || sect == FREESECT) { + // end of sector chain + break; + } + + // get the sector data + size_t byteOffset = sectorSize * (sect + 1); + BufferView sector(bv.GetData() + byteOffset, sectorSize); + fat.Add(sector); + } + + if (fat.GetLength() > actualNumberOfSectors * sizeof(uint32)) { + fat.Resize(actualNumberOfSectors * sizeof(uint32)); + } + + // load directory + + BufferView view = fat; + + auto left = stream.GetSize() - stream.GetCursor(); + + return true; +} + +bool DOCFile::ProcessData() +{ + BufferView bv = obj->GetData().GetEntireFile(); + + ////// decompress the "dir" stream + ////Buffer decompressed; + ////DecompressStream(bv, decompressed); + + ////// parse the decompressed dir stream + ////ParseUncompressedDirStream(decompressed); + + //// parse a module file + //ParseModuleStream(bv); + + // TODO: parse the compound file binary format + + ParseVBAProject(bv); + + return true; +} + +bool DOCFile::BeginIteration(std::u16string_view path, AppCUI::Controls::TreeViewItem parent) +{ + return true; +} + +bool DOCFile::PopulateItem(AppCUI::Controls::TreeViewItem item) +{ + return false; +} + +void DOCFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewItem item) +{ +} +} // namespace GView::Type::DOC diff --git a/Types/DOC/src/PanelInformation.cpp b/Types/DOC/src/PanelInformation.cpp new file mode 100644 index 00000000..977c5474 --- /dev/null +++ b/Types/DOC/src/PanelInformation.cpp @@ -0,0 +1,46 @@ +#include "doc.hpp" + +using namespace GView::Type::DOC; +using namespace AppCUI::Controls; + +Panels::Information::Information(Reference _doc) : TabPage("&Information") +{ + doc = _doc; + general = Factory::ListView::Create(this, "x:0,y:0,w:100%,h:10", { "n:Field,w:12", "n:Value,w:100" }, ListViewFlags::None); + headers = Factory::ListView::Create(this, "x:0,y:10,w:100%,h:20", { "n:Field,w:12", "n:Value,w:10000" }, ListViewFlags::None); + + this->Update(); +} +void Panels::Information::UpdateGeneralInformation() +{ + general->DeleteAllItems(); + + general->AddItem("File"); + // size + { + LocalString<256> tempStr; + auto sizeString = NumericFormatter().ToString(doc->obj->GetData().GetSize(), { NumericFormatFlags::None, 10, 3, ',' }).data(); + auto value = tempStr.Format("%s bytes", sizeString); + general->AddItem({ "Size", value }); + } +} + +void Panels::Information::UpdateIssues() +{ +} +void Panels::Information::RecomputePanelsPositions() +{ + int w = this->GetWidth(); + int h = this->GetHeight(); + + if (!general.IsValid()) + return; + + this->general->Resize(w, h); +} +void Panels::Information::Update() +{ + UpdateGeneralInformation(); + UpdateIssues(); + RecomputePanelsPositions(); +} diff --git a/Types/DOC/src/doc.cpp b/Types/DOC/src/doc.cpp new file mode 100644 index 00000000..11e44c38 --- /dev/null +++ b/Types/DOC/src/doc.cpp @@ -0,0 +1,81 @@ +#include "doc.hpp" +#include +#include +#include + +using namespace AppCUI; +using namespace AppCUI::Utils; +using namespace AppCUI::Application; +using namespace AppCUI::Controls; +using namespace GView::Utils; +using namespace GView::Type; +using namespace GView; +using namespace GView::View; + + +constexpr string_view DOC_ICON = "1111111111111111" // 5 + "1wwwwwwwww111111" // 6 + "1w11111111w11111" // 7 + "1w111111111w1111" // 8 + "1w1111111111w111" // 9 + "1w11111111111w11" // 9 + "1w111111111111w1" // 9 + "1w1www1www1111w1" // 9 + "1w111111111111w1" // 10 + "1w1wwwww1wwww1w1" // 11 + "1w111111111111w1" // 12 + "1w1wwwwwwwww11w1" // 12 + "1w111111111111w1" // 13 + "1w111111111111w1" // 14 + "1wwwwwwwwwwwwww1" // 15 + "1111111111111111"; // 16 + +void CreateContainerView(Reference win, Reference doc) +{ + ContainerViewer::Settings settings; + + settings.SetIcon(DOC_ICON); + settings.SetColumns({ + "n:&Index,a:r,w:50", + }); + + settings.SetEnumerateCallback(win->GetObject()->GetContentType().ToObjectRef()); + settings.SetOpenItemCallback(win->GetObject()->GetContentType().ToObjectRef()); + + win->CreateViewer(settings); +} + +extern "C" { +PLUGIN_EXPORT bool Validate(const AppCUI::Utils::BufferView& buf, const std::string_view& extension) +{ + return true; +} +PLUGIN_EXPORT TypeInterface* CreateInstance() +{ + return new DOC::DOCFile(); +} + +PLUGIN_EXPORT bool PopulateWindow(Reference win) +{ + auto doc = win->GetObject()->GetContentType(); + + // TODO: check return value + doc->ProcessData(); + + CreateContainerView(win, doc); + win->AddPanel(Pointer(new DOC::Panels::Information(doc)), true); + + return true; +} +PLUGIN_EXPORT void UpdateSettings(IniSection sect) +{ + sect["Extension"] = { "doc" }; + sect["Priority"] = 1; + sect["Description"] = "Document (*.doc)"; +} +} + +int main() +{ + return 0; +} From 855d7ad218298617253814a8a2b8cc19b30f110e Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Mon, 3 Jun 2024 14:31:50 +0300 Subject: [PATCH 06/20] Add support for extracting a stream from the vbaProject.bin file --- CMakeLists.txt | 1 + Types/DOC/include/doc.hpp | 3 +- Types/DOC/src/ByteStream.cpp | 3 +- Types/DOC/src/DOCFile.cpp | 207 ++++++++++++++++++++++++++++++++--- 4 files changed, 197 insertions(+), 17 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 94475953..f03d9809 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -198,6 +198,7 @@ if(NOT DEFINED CMAKE_TESTING_ENABLED) add_subdirectory(Types/SQLite) add_subdirectory(Types/JCLASS) add_subdirectory(Types/EML) + add_subdirectory(Types/DOC) # Generic plugins supported by GView add_subdirectory(GenericPlugins/CharacterTable) diff --git a/Types/DOC/include/doc.hpp b/Types/DOC/include/doc.hpp index fb7cceaf..8eb60dd1 100644 --- a/Types/DOC/include/doc.hpp +++ b/Types/DOC/include/doc.hpp @@ -22,6 +22,7 @@ namespace Type public: ByteStream(void* ptr, size_t size) : ptr(ptr), size(size), cursor(0) {}; + ByteStream(BufferView view) : ptr((void*) view.GetData()), size(view.GetLength()), cursor(0) {}; BufferView Read(size_t count); template T ReadAs() { @@ -34,7 +35,7 @@ namespace Type return value; } - void Seek(size_t count); + ByteStream& Seek(size_t count); size_t GetCursor() { return cursor; diff --git a/Types/DOC/src/ByteStream.cpp b/Types/DOC/src/ByteStream.cpp index f6deb1bc..e964a32c 100644 --- a/Types/DOC/src/ByteStream.cpp +++ b/Types/DOC/src/ByteStream.cpp @@ -16,10 +16,11 @@ BufferView ByteStream::Read(size_t count) return view; } -void ByteStream::Seek(size_t count) +ByteStream& ByteStream::Seek(size_t count) { if (cursor + count > size) { count = size - cursor; } cursor += count; + return *this; } diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index 7c9fdc79..c2b17594 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -10,6 +10,7 @@ using namespace GView::View::LexicalViewer; #define FREESECT 0xffffffff #define FATSECT 0xfffffffd #define DIFSECT 0xfffffffc +#define NOSTREAM 0xffffffff DOCFile::DOCFile() @@ -449,11 +450,165 @@ bool ParseModuleStream(BufferView bv) return true; } + +#pragma pack(1) +struct CFDirEntry_Data { + uint8 nameUnicode[64]; // the structure starts from here + uint16 nameLength; + uint8 objectType; + uint8 colorFlag; // 0x00 (red) or 0x01 (black) + uint32 leftSiblingId; + uint32 rightSiblingId; + uint32 childId; + uint8 clsid[16]; + uint32 stateBits; + uint64 creationTime; + uint64 modifiedTime; + uint32 startingSectorLocation; + uint64 streamSize; +}; + + +// TODO: move to another file +class CFDirEntry +{ + private: + void AppendChildren(uint32 childId) + { + if (childId == NOSTREAM) { + return; + } + + CFDirEntry child(directoryData, childId); + + AppendChildren(child.data.leftSiblingId); + size_t childIndex = children.size(); + children.emplace_back(); + AppendChildren(child.data.rightSiblingId); + + child.BuildStorageTree(); + + children[childIndex] = child; + }; + + public: + CFDirEntry() {}; + CFDirEntry(BufferView _directoryData, uint32 _entryId) + { + Load(_directoryData, _entryId); + }; + + bool Load(BufferView _directoryData, uint32 _entryId) + { + CHECK(!initialized, false, "already initialized"); + initialized = true; + + directoryData = _directoryData; + entryId = _entryId; + data = ByteStream(directoryData).Seek(entryId * 128).ReadAs(); + + CHECK(data.nameLength % 2 == 0, false, "nameLength"); + CHECK(data.objectType == 0x00 || data.objectType == 0x01 || data.objectType == 0x02 || data.objectType == 0x05, false, "objectType"); + CHECK(data.colorFlag == 0x00 || data.colorFlag == 0x01, false, "colorFlag"); + + return true; + } + + void BuildStorageTree() + { + if (data.childId == NOSTREAM) { + return; + } + + // add children + AppendChildren(data.childId); + } + + bool FindChildByName(std::u16string_view entryName, CFDirEntry& entry) + { + for (CFDirEntry& child : children) { + std::u16string_view childName((char16_t*) child.data.nameUnicode, child.data.nameLength / 2 - 1); + if (!entryName.starts_with(childName)) { + continue; + } + + auto pos = entryName.find_first_of(u'/'); + if (pos == std::u16string::npos) { + entry = child; + return true; + } else { + std::u16string_view newEntryName = entryName.substr(pos + 1); + return child.FindChildByName(newEntryName, entry); + } + } + return false; + } + + private: + BufferView directoryData; + bool initialized = false; + + public: + uint32 entryId{}; + CFDirEntry_Data data{}; + std::vector children; +}; + + +Buffer OpenCFStream(BufferView bv, BufferView fat, uint32 sect, uint16 sectorSize, uint32 size, uint32 offset) +{ + Buffer data; + uint16 actualNumberOfSectors = ((size + sectorSize - 1) / sectorSize); + for (uint32 i = 0; i < actualNumberOfSectors; ++i) { + if (sect == ENDOFCHAIN) { + // end of sector chain + break; + } + + BufferView sectorData(bv.GetData() + offset + sectorSize * sect, sectorSize); + data.Add(sectorData); + + if (sect >= fat.GetLength()) { + return Buffer(); + } + sect = *(((uint32*) fat.GetData()) + sect); // get the next sect + } + + if (data.GetLength() > size) { + data.Resize(size); + } + + return data; +} + + +void DisplayAllVBAProjectFiles(CFDirEntry& entry, uint32 miniStreamCutoffSize, BufferView bv, BufferView fat, uint32 sectorSize, BufferView miniStream, BufferView miniFat, uint32 miniSectorSize) +{ + auto type = entry.data.objectType; + char16* name = (char16*) entry.data.nameUnicode; + + if (type == 0x02) { + Buffer entryBuffer; + + if (entry.data.streamSize < miniStreamCutoffSize) { + entryBuffer = OpenCFStream(miniStream, miniFat, entry.data.startingSectorLocation, miniSectorSize, entry.data.streamSize, 0); + } else { + entryBuffer = OpenCFStream(bv, fat, entry.data.startingSectorLocation, sectorSize, entry.data.streamSize, sectorSize); + } + + GView::App::OpenBuffer(entryBuffer, name, "", GView::App::OpenMethod::BestMatch, "bin"); + } + + for (auto& child : entry.children) { + DisplayAllVBAProjectFiles(child, miniStreamCutoffSize, bv, fat, sectorSize, miniStream, miniFat, miniSectorSize); + } +} + + bool ParseVBAProject(BufferView bv) { ByteStream stream((void*) bv.GetData(), bv.GetLength()); - // TODO: extract in outer stuff constexpr uint8 headerSignature[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; for (uint32 i = 0; i < ARRAY_LEN(headerSignature); ++i) { CHECK(stream.ReadAs() == headerSignature[i], false, "headerSignature"); @@ -487,7 +642,8 @@ bool ParseVBAProject(BufferView bv) auto firstDirectorySectorLocation = stream.ReadAs(); auto transactionSignatureNumber = stream.ReadAs(); // incremented every time the file is saved - CHECK(stream.ReadAs() == 0x1000, false, "miniStreamCutoffSize"); + auto miniStreamCutoffSize = stream.ReadAs(); + CHECK(miniStreamCutoffSize == 0x1000, false, "miniStreamCutoffSize"); auto firstMiniFatSectorLocation = stream.ReadAs(); auto numberOfMiniFatSectors = stream.ReadAs(); @@ -502,21 +658,15 @@ bool ParseVBAProject(BufferView bv) } if (majorVersion == 0x04) { - stream.Seek(3584); // TODO: MUST check if they are all zeros - } - - { // TODO: remove - std::ofstream out("D:\\temp\\ceva", std::ios::binary | std::ios::trunc); - auto temp = stream.Read(stream.GetSize()); - out.write((const char*) temp.GetData(), temp.GetLength()); + // check if the next 3584 bytes are 0 + uint32 zeroCheckIndex = 3584; + while (zeroCheckIndex--) { + CHECK(stream.ReadAs() == 0x00, false, "zeroCheck"); + } } - uint16 actualNumberOfSectors = ((bv.GetLength() + sectorSize - 1) / sectorSize) - 1; - // load FAT - Buffer fat; - for (size_t locationIndex = 0; locationIndex < locationsCount; ++locationIndex) { uint32 sect = DIFAT[locationIndex]; if (sect == ENDOFCHAIN || sect == FREESECT) { @@ -530,15 +680,42 @@ bool ParseVBAProject(BufferView bv) fat.Add(sector); } + uint16 actualNumberOfSectors = ((bv.GetLength() + sectorSize - 1) / sectorSize) - 1; if (fat.GetLength() > actualNumberOfSectors * sizeof(uint32)) { fat.Resize(actualNumberOfSectors * sizeof(uint32)); } // load directory + Buffer directoryData = OpenCFStream(bv, fat, firstDirectorySectorLocation, sectorSize, bv.GetLength(), sectorSize); + + // parse dir entries + // start with root entry + + CFDirEntry root(directoryData, 0); + root.BuildStorageTree(); + + uint32 streamSize = numberOfMiniFatSectors * sectorSize; + uint16 actualNumberOfMinisectors = (root.data.streamSize + miniSectorSize - 1) / miniSectorSize; + + // load miniFat + Buffer miniFat = OpenCFStream(bv, fat, firstMiniFatSectorLocation, sectorSize, streamSize, sectorSize); // will be interpreted as uint32* + if (miniFat.GetLength() > actualNumberOfMinisectors * sizeof(uint32)) { + miniFat.Resize(actualNumberOfMinisectors * sizeof(uint32)); + } + + // load ministream + uint32 miniStreamSize = root.data.streamSize; + Buffer miniStream = OpenCFStream(bv, fat, root.data.startingSectorLocation, sectorSize, miniStreamSize, sectorSize); + + // find file + + //CFDirEntry found; + //CHECK(root.FindChildByName(u"The VBA Project/_VBA_Project/VBA/ThisTerminal", found), false, ""); + //Buffer foundData = OpenCFStream(miniStream, miniFat, found.data.startingSectorLocation, miniSectorSize, found.data.streamSize, 0); - BufferView view = fat; + //ParseModuleStream(foundData); - auto left = stream.GetSize() - stream.GetCursor(); + DisplayAllVBAProjectFiles(root, miniStreamCutoffSize, bv, fat, sectorSize, miniStream, miniFat, miniSectorSize); return true; } From 2961042c7174c3048e20f4f9d04c5786e2341768 Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Mon, 3 Jun 2024 18:21:38 +0300 Subject: [PATCH 07/20] Refactor code and parse all VBA modules --- Types/DOC/include/doc.hpp | 113 ++++++++++++++ Types/DOC/src/CFDirEntry.cpp | 82 ++++++++++ Types/DOC/src/DOCFile.cpp | 287 +++++++++++------------------------ 3 files changed, 284 insertions(+), 198 deletions(-) create mode 100644 Types/DOC/src/CFDirEntry.cpp diff --git a/Types/DOC/include/doc.hpp b/Types/DOC/include/doc.hpp index 8eb60dd1..9b4fc09d 100644 --- a/Types/DOC/include/doc.hpp +++ b/Types/DOC/include/doc.hpp @@ -2,6 +2,9 @@ #include "GView.hpp" +#define NOSTREAM 0xffffffff + + namespace GView { namespace Type @@ -47,11 +50,109 @@ namespace Type } }; + + // TODO: move to another file + + #pragma pack(1) + struct CFDirEntry_Data { + uint8 nameUnicode[64]; // the structure starts from here + uint16 nameLength; + uint8 objectType; + uint8 colorFlag; // 0x00 (red) or 0x01 (black) + uint32 leftSiblingId; + uint32 rightSiblingId; + uint32 childId; + uint8 clsid[16]; + uint32 stateBits; + uint64 creationTime; + uint64 modifiedTime; + uint32 startingSectorLocation; + uint64 streamSize; + }; + + class CFDirEntry + { + private: + void AppendChildren(uint32 childId); + + public: + CFDirEntry(); + CFDirEntry(BufferView _directoryData, uint32 _entryId); + + bool Load(BufferView _directoryData, uint32 _entryId); + void BuildStorageTree(); + bool FindChildByName(std::u16string_view entryName, CFDirEntry& entry); + + private: + BufferView directoryData; + bool initialized = false; + + public: + uint32 entryId{}; + CFDirEntry_Data data{}; + std::vector children; + }; + + // REFERENCE records + struct REFERENCECONTROL_Record { + uint32 recordIndex; + String libidTwiddled; + String nameRecordExtended; + String libidExtended; + BufferView originalTypeLib; + uint32 cookie; + }; + + struct REFERENCEORIGINAL_Record { + uint32 recordIndex; + String libidOriginal; + REFERENCECONTROL_Record referenceControl; + }; + + struct REFERENCEREGISTERED_Record { + uint32 recordIndex; + String libid; + }; + + struct REFERENCEPROJECT_Record { + uint32 recordIndex; + String libidAbsolute; + String libidRelative; + uint32 majorVersion; + uint16 minorVersion; + }; + + struct MODULE_Record { + String moduleName; + String streamName; + String docString; + uint32 textOffset; + uint32 helpContext; + }; + class DOCFile : public TypeInterface, public View::ContainerViewer::EnumerateInterface, public View::ContainerViewer::OpenItemInterface { private: friend class Panels::Information; + // compound files (vbaProject.bin) helper member variables + AppCUI::Utils::Buffer vbaProjectBuffer; + AppCUI::Utils::Buffer FAT; + AppCUI::Utils::Buffer miniStream; + AppCUI::Utils::Buffer miniFAT; + + uint16 sectorSize{}; + uint16 miniSectorSize{}; + uint16 miniStreamCutoffSize{}; + + // VBA streams helper member variables + std::vector referenceControlRecords; + std::vector referenceOriginalRecords; + std::vector referenceRegisteredRecords; + std::vector referenceProjectRecords; + + std::vector moduleRecords; + public: DOCFile(); virtual ~DOCFile() override @@ -93,6 +194,18 @@ namespace Type // View::ContainerViewer::OpenItemInterface virtual void OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewItem item) override; + + // compound files (vbaProject.bin) helper methods + bool ParseVBAProject(); + Buffer OpenCFStream(const CFDirEntry& entry); + Buffer OpenCFStream(uint32 sect, uint32 size, bool useMiniFAT); + + void DisplayAllVBAProjectFiles(CFDirEntry& entry); + + // VBA streams helper methods + bool DecompressStream(BufferView bv, Buffer& decompressed); + bool ParseUncompressedDirStream(BufferView bv); + bool ParseModuleStream(BufferView bv, MODULE_Record moduleRecord); }; namespace Panels diff --git a/Types/DOC/src/CFDirEntry.cpp b/Types/DOC/src/CFDirEntry.cpp new file mode 100644 index 00000000..80b30f67 --- /dev/null +++ b/Types/DOC/src/CFDirEntry.cpp @@ -0,0 +1,82 @@ +#include "doc.hpp" + + +using namespace GView::Type::DOC; + + +CFDirEntry::CFDirEntry() +{ +} + +CFDirEntry::CFDirEntry(BufferView _directoryData, uint32 _entryId) +{ + Load(_directoryData, _entryId); +} + +void CFDirEntry::AppendChildren(uint32 childId) +{ + if (childId == NOSTREAM) { + return; + } + + CFDirEntry child(directoryData, childId); + + AppendChildren(child.data.leftSiblingId); + size_t childIndex = children.size(); + children.emplace_back(); + AppendChildren(child.data.rightSiblingId); + + child.BuildStorageTree(); + + children[childIndex] = child; +}; + + +bool CFDirEntry::Load(BufferView _directoryData, uint32 _entryId) +{ + CHECK(!initialized, false, "already initialized"); + initialized = true; + + directoryData = _directoryData; + entryId = _entryId; + data = ByteStream(directoryData).Seek(entryId * 128).ReadAs(); + + CHECK(data.nameLength % 2 == 0, false, "nameLength"); + CHECK(data.objectType == 0x00 || data.objectType == 0x01 || data.objectType == 0x02 || data.objectType == 0x05, false, "objectType"); + CHECK(data.colorFlag == 0x00 || data.colorFlag == 0x01, false, "colorFlag"); + + return true; +} + + +void CFDirEntry::BuildStorageTree() +{ + if (data.childId == NOSTREAM) { + return; + } + + // add children + AppendChildren(data.childId); +} + + +bool CFDirEntry::FindChildByName(std::u16string_view entryName, CFDirEntry& entry) +{ + for (CFDirEntry& child : children) { + std::u16string_view childName((char16_t*) child.data.nameUnicode, child.data.nameLength / 2 - 1); + if (!entryName.starts_with(childName)) { + continue; + } + + auto pos = entryName.find_first_of(u'/'); + if (pos == std::u16string::npos) { + entry = child; + return true; + } else { + std::u16string_view newEntryName = entryName.substr(pos + 1); + return child.FindChildByName(newEntryName, entry); + } + } + return false; +} + diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index c2b17594..2df78aef 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -10,7 +10,6 @@ using namespace GView::View::LexicalViewer; #define FREESECT 0xffffffff #define FATSECT 0xfffffffd #define DIFSECT 0xfffffffc -#define NOSTREAM 0xffffffff DOCFile::DOCFile() @@ -18,7 +17,7 @@ DOCFile::DOCFile() } -bool DecompressStream(BufferView bv, Buffer& decompressed) +bool DOCFile::DecompressStream(BufferView bv, Buffer& decompressed) { // TODO: document the compression algorithm and expose it into the core @@ -91,55 +90,13 @@ bool DecompressStream(BufferView bv, Buffer& decompressed) } } - BufferView view(decompressed.GetData(), decompressed.GetLength()); - - GView::App::OpenBuffer(decompressed, "decompressed", "", GView::App::OpenMethod::BestMatch, "bin"); - return true; } enum SysKind { Win16Bit = 0, Win32Bit, Macintosh, Win64Bit }; -struct REFERENCECONTROL_Record { - uint32 recordIndex; - String libidTwiddled; - String nameRecordExtended; - String libidExtended; - BufferView originalTypeLib; - uint32 cookie; -}; - -struct REFERENCEORIGINAL_Record { - uint32 recordIndex; - String libidOriginal; - REFERENCECONTROL_Record referenceControl; -}; - -struct REFERENCEREGISTERED_Record { - uint32 recordIndex; - String libid; -}; - -struct REFERENCEPROJECT_Record { - uint32 recordIndex; - String libidAbsolute; - String libidRelative; - uint32 majorVersion; - uint16 minorVersion; -}; - - -struct MODULE_Record { - String moduleName; - String streamName; - String docString; - uint32 textOffset; - uint32 helpContext; -}; - - -bool ParseUncompressedDirStream(BufferView bv) +bool DOCFile::ParseUncompressedDirStream(BufferView bv) { ByteStream stream((void*) bv.GetData(), bv.GetLength()); uint16 check; @@ -238,7 +195,7 @@ bool ParseUncompressedDirStream(BufferView bv) { // REFERENCECONTROL Record - REFERENCECONTROL_Record record; + auto& record = referenceControlRecords.emplace_back(); record.recordIndex = recordIndex; stream.Seek(sizeof(uint32)); // SizeTwiddled @@ -274,7 +231,7 @@ bool ParseUncompressedDirStream(BufferView bv) case 0x33: { // REFERENCEORIGINAL Record - REFERENCEORIGINAL_Record record; + auto& record = referenceOriginalRecords.emplace_back(); record.recordIndex = recordIndex; auto sizeOfLibidOriginal = stream.ReadAs(); @@ -313,7 +270,7 @@ bool ParseUncompressedDirStream(BufferView bv) case 0x0d: { // REFERENCEREGISTERED Record - REFERENCEREGISTERED_Record record; + auto& record = referenceRegisteredRecords.emplace_back(); record.recordIndex = recordIndex; stream.Seek(sizeof(uint32)); // ignored Size @@ -329,7 +286,7 @@ bool ParseUncompressedDirStream(BufferView bv) case 0x0e: { // REFERENCEPROJECT Record - REFERENCEPROJECT_Record record; + auto& record = referenceProjectRecords.emplace_back(); record.recordIndex = recordIndex; stream.Seek(sizeof(uint32)); // ignored Size @@ -357,13 +314,12 @@ bool ParseUncompressedDirStream(BufferView bv) CHECK(stream.ReadAs() == 0x02, false, "projectCookie_size"); stream.Seek(sizeof(uint16)); // ignored Cookie - std::vector moduleRecords(modulesCount); - // array of MODULE records for (uint32 moduleIndex = 0; moduleIndex < modulesCount; ++moduleIndex) { // TODO: check this - MUST have a corresponding specified in PROJECT Stream - MODULE_Record& moduleRecord = moduleRecords[moduleIndex]; + // TODO: preallocate them based on modulesCount + MODULE_Record& moduleRecord = moduleRecords.emplace_back(); CHECK(stream.ReadAs() == 0x19, false, "moduleName_id"); auto sizeOfModuleName = stream.ReadAs(); @@ -434,11 +390,11 @@ bool ParseUncompressedDirStream(BufferView bv) return true; } -bool ParseModuleStream(BufferView bv) +bool DOCFile::ParseModuleStream(BufferView bv, MODULE_Record moduleRecord) { - constexpr size_t moduleTextOffset = 2607; // TODO: in the future get this from the parsed dir stream + size_t moduleTextOffset = moduleRecord.textOffset; - ByteStream stream((void*) bv.GetData(), bv.GetLength()); + ByteStream stream(bv); stream.Seek(moduleTextOffset); @@ -447,128 +403,57 @@ bool ParseModuleStream(BufferView bv) Buffer decompressed; DecompressStream(compressed, decompressed); - return true; -} - - -#pragma pack(1) -struct CFDirEntry_Data { - uint8 nameUnicode[64]; // the structure starts from here - uint16 nameLength; - uint8 objectType; - uint8 colorFlag; // 0x00 (red) or 0x01 (black) - uint32 leftSiblingId; - uint32 rightSiblingId; - uint32 childId; - uint8 clsid[16]; - uint32 stateBits; - uint64 creationTime; - uint64 modifiedTime; - uint32 startingSectorLocation; - uint64 streamSize; -}; - - -// TODO: move to another file -class CFDirEntry -{ - private: - void AppendChildren(uint32 childId) - { - if (childId == NOSTREAM) { - return; - } - CFDirEntry child(directoryData, childId); + // TODO: de vazut tf nu merge VBA plugin + //GView::App::OpenBuffer(decompressed, moduleRecord.streamName, "", GView::App::OpenMethod::ForceType, "VBA"); - AppendChildren(child.data.leftSiblingId); - size_t childIndex = children.size(); - children.emplace_back(); - AppendChildren(child.data.rightSiblingId); + GView::App::OpenBuffer(decompressed, moduleRecord.streamName, "", GView::App::OpenMethod::BestMatch, "bin"); - child.BuildStorageTree(); - - children[childIndex] = child; - }; - - public: - CFDirEntry() {}; - CFDirEntry(BufferView _directoryData, uint32 _entryId) - { - Load(_directoryData, _entryId); - }; - - bool Load(BufferView _directoryData, uint32 _entryId) - { - CHECK(!initialized, false, "already initialized"); - initialized = true; - - directoryData = _directoryData; - entryId = _entryId; - data = ByteStream(directoryData).Seek(entryId * 128).ReadAs(); - - CHECK(data.nameLength % 2 == 0, false, "nameLength"); - CHECK(data.objectType == 0x00 || data.objectType == 0x01 || data.objectType == 0x02 || data.objectType == 0x05, false, "objectType"); - CHECK(data.colorFlag == 0x00 || data.colorFlag == 0x01, false, "colorFlag"); - - return true; - } + return true; +} - void BuildStorageTree() - { - if (data.childId == NOSTREAM) { - return; - } - // add children - AppendChildren(data.childId); - } - - bool FindChildByName(std::u16string_view entryName, CFDirEntry& entry) - { - for (CFDirEntry& child : children) { - std::u16string_view childName((char16_t*) child.data.nameUnicode, child.data.nameLength / 2 - 1); - if (!entryName.starts_with(childName)) { - continue; - } +Buffer DOCFile::OpenCFStream(const CFDirEntry& entry) +{ + auto sect = entry.data.startingSectorLocation; + auto size = entry.data.streamSize; + bool useMiniFAT = size < miniStreamCutoffSize; + + return OpenCFStream(sect, size, useMiniFAT); +} - auto pos = entryName.find_first_of(u'/'); - if (pos == std::u16string::npos) { - entry = child; - return true; - } else { - std::u16string_view newEntryName = entryName.substr(pos + 1); - return child.FindChildByName(newEntryName, entry); - } - } - return false; +Buffer DOCFile::OpenCFStream(uint32 sect, uint32 size, bool useMiniFAT) +{ + BufferView stream; + BufferView fat; + uint32 usedSectorSize; + uint32 offset; + + if (useMiniFAT) { + // use miniFAT + stream = miniStream; + fat = miniFAT; + usedSectorSize = miniSectorSize; + offset = 0; + } else { + // use FAT + stream = vbaProjectBuffer; + fat = FAT; + usedSectorSize = sectorSize; + offset = usedSectorSize; } - private: - BufferView directoryData; - bool initialized = false; - - public: - uint32 entryId{}; - CFDirEntry_Data data{}; - std::vector children; -}; - - -Buffer OpenCFStream(BufferView bv, BufferView fat, uint32 sect, uint16 sectorSize, uint32 size, uint32 offset) -{ Buffer data; - uint16 actualNumberOfSectors = ((size + sectorSize - 1) / sectorSize); + uint16 actualNumberOfSectors = ((size + usedSectorSize - 1) / usedSectorSize); for (uint32 i = 0; i < actualNumberOfSectors; ++i) { if (sect == ENDOFCHAIN) { // end of sector chain break; } - BufferView sectorData(bv.GetData() + offset + sectorSize * sect, sectorSize); - data.Add(sectorData); + data.Add(ByteStream(stream).Seek(offset + usedSectorSize * sect).Read(usedSectorSize)); - if (sect >= fat.GetLength()) { + if (sect * sizeof(uint32) >= fat.GetLength()) { return Buffer(); } sect = *(((uint32*) fat.GetData()) + sect); // get the next sect @@ -582,32 +467,26 @@ Buffer OpenCFStream(BufferView bv, BufferView fat, uint32 sect, uint16 sectorSiz } -void DisplayAllVBAProjectFiles(CFDirEntry& entry, uint32 miniStreamCutoffSize, BufferView bv, BufferView fat, uint32 sectorSize, BufferView miniStream, BufferView miniFat, uint32 miniSectorSize) +void DOCFile::DisplayAllVBAProjectFiles(CFDirEntry& entry) { auto type = entry.data.objectType; char16* name = (char16*) entry.data.nameUnicode; if (type == 0x02) { - Buffer entryBuffer; - - if (entry.data.streamSize < miniStreamCutoffSize) { - entryBuffer = OpenCFStream(miniStream, miniFat, entry.data.startingSectorLocation, miniSectorSize, entry.data.streamSize, 0); - } else { - entryBuffer = OpenCFStream(bv, fat, entry.data.startingSectorLocation, sectorSize, entry.data.streamSize, sectorSize); - } + Buffer entryBuffer = DOCFile::OpenCFStream(entry); GView::App::OpenBuffer(entryBuffer, name, "", GView::App::OpenMethod::BestMatch, "bin"); } for (auto& child : entry.children) { - DisplayAllVBAProjectFiles(child, miniStreamCutoffSize, bv, fat, sectorSize, miniStream, miniFat, miniSectorSize); + DisplayAllVBAProjectFiles(child); } } -bool ParseVBAProject(BufferView bv) +bool DOCFile::ParseVBAProject() { - ByteStream stream((void*) bv.GetData(), bv.GetLength()); + ByteStream stream(vbaProjectBuffer); constexpr uint8 headerSignature[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; for (uint32 i = 0; i < ARRAY_LEN(headerSignature); ++i) { @@ -624,11 +503,11 @@ bool ParseVBAProject(BufferView bv) CHECK(stream.ReadAs() == 0xfffe, false, "byteOrder"); auto sectorShift = stream.ReadAs(); CHECK((majorVersion == 0x03 && sectorShift == 0x09) || (majorVersion == 0x04 && sectorShift == 0x0c), false, "sectorShift"); - uint16 sectorSize = 1 << sectorShift; + sectorSize = 1 << sectorShift; auto miniSectorShift = stream.ReadAs(); CHECK(miniSectorShift == 0x06, false, "miniSectorShift"); - uint16 miniSectorSize = 1 << miniSectorShift; + miniSectorSize = 1 << miniSectorShift; CHECK(stream.ReadAs() == 0x00, false, "reserved"); CHECK(stream.ReadAs() == 0x00, false, "reserved"); @@ -642,7 +521,7 @@ bool ParseVBAProject(BufferView bv) auto firstDirectorySectorLocation = stream.ReadAs(); auto transactionSignatureNumber = stream.ReadAs(); // incremented every time the file is saved - auto miniStreamCutoffSize = stream.ReadAs(); + miniStreamCutoffSize = stream.ReadAs(); CHECK(miniStreamCutoffSize == 0x1000, false, "miniStreamCutoffSize"); auto firstMiniFatSectorLocation = stream.ReadAs(); @@ -650,6 +529,7 @@ bool ParseVBAProject(BufferView bv) auto firstDifatSectorLocation = stream.ReadAs(); auto numberOfDifatSectors = stream.ReadAs(); + // TODO: where to use this? constexpr size_t locationsCount = 109; uint32 DIFAT[locationsCount]; // the first 109 FAT sector locations of the compound file { @@ -666,7 +546,6 @@ bool ParseVBAProject(BufferView bv) } // load FAT - Buffer fat; for (size_t locationIndex = 0; locationIndex < locationsCount; ++locationIndex) { uint32 sect = DIFAT[locationIndex]; if (sect == ENDOFCHAIN || sect == FREESECT) { @@ -676,20 +555,19 @@ bool ParseVBAProject(BufferView bv) // get the sector data size_t byteOffset = sectorSize * (sect + 1); - BufferView sector(bv.GetData() + byteOffset, sectorSize); - fat.Add(sector); + BufferView sector(vbaProjectBuffer.GetData() + byteOffset, sectorSize); + FAT.Add(sector); } - uint16 actualNumberOfSectors = ((bv.GetLength() + sectorSize - 1) / sectorSize) - 1; - if (fat.GetLength() > actualNumberOfSectors * sizeof(uint32)) { - fat.Resize(actualNumberOfSectors * sizeof(uint32)); + uint16 actualNumberOfSectors = ((vbaProjectBuffer.GetLength() + sectorSize - 1) / sectorSize) - 1; + if (FAT.GetLength() > actualNumberOfSectors * sizeof(uint32)) { + FAT.Resize(actualNumberOfSectors * sizeof(uint32)); } // load directory - Buffer directoryData = OpenCFStream(bv, fat, firstDirectorySectorLocation, sectorSize, bv.GetLength(), sectorSize); + Buffer directoryData = OpenCFStream(firstDirectorySectorLocation, vbaProjectBuffer.GetLength(), false); - // parse dir entries - // start with root entry + // parse dir entries, starting with root entry CFDirEntry root(directoryData, 0); root.BuildStorageTree(); @@ -697,32 +575,47 @@ bool ParseVBAProject(BufferView bv) uint32 streamSize = numberOfMiniFatSectors * sectorSize; uint16 actualNumberOfMinisectors = (root.data.streamSize + miniSectorSize - 1) / miniSectorSize; - // load miniFat - Buffer miniFat = OpenCFStream(bv, fat, firstMiniFatSectorLocation, sectorSize, streamSize, sectorSize); // will be interpreted as uint32* - if (miniFat.GetLength() > actualNumberOfMinisectors * sizeof(uint32)) { - miniFat.Resize(actualNumberOfMinisectors * sizeof(uint32)); + // load miniFAT + miniFAT = OpenCFStream(firstMiniFatSectorLocation, streamSize, false); // will be interpreted as uint32* + if (miniFAT.GetLength() > actualNumberOfMinisectors * sizeof(uint32)) { + miniFAT.Resize(actualNumberOfMinisectors * sizeof(uint32)); } // load ministream uint32 miniStreamSize = root.data.streamSize; - Buffer miniStream = OpenCFStream(bv, fat, root.data.startingSectorLocation, sectorSize, miniStreamSize, sectorSize); + miniStream = OpenCFStream(root.data.startingSectorLocation, miniStreamSize, false); // find file - //CFDirEntry found; - //CHECK(root.FindChildByName(u"The VBA Project/_VBA_Project/VBA/ThisTerminal", found), false, ""); - //Buffer foundData = OpenCFStream(miniStream, miniFat, found.data.startingSectorLocation, miniSectorSize, found.data.streamSize, 0); + CFDirEntry dir; + CHECK(root.FindChildByName(u"The VBA Project/_VBA_Project/VBA/dir", dir), false, ""); + Buffer dirData = OpenCFStream(dir); - //ParseModuleStream(foundData); + Buffer decompressedDirData; + DecompressStream(dirData, decompressedDirData); + ParseUncompressedDirStream(decompressedDirData); - DisplayAllVBAProjectFiles(root, miniStreamCutoffSize, bv, fat, sectorSize, miniStream, miniFat, miniSectorSize); + //DisplayAllVBAProjectFiles(root); + + for (auto& moduleRecord : moduleRecords) { + UnicodeStringBuilder streamName(moduleRecord.streamName); + // TODO: make this more generic + std::u16string absoluteStreamName = u"The VBA Project/_VBA_Project/VBA/"; + absoluteStreamName.append(streamName); + + CFDirEntry moduleEntry; + CHECK(root.FindChildByName(absoluteStreamName, moduleEntry), false, ""); + + Buffer moduleData = OpenCFStream(moduleEntry); + ParseModuleStream(moduleData, moduleRecord); + } return true; } bool DOCFile::ProcessData() { - BufferView bv = obj->GetData().GetEntireFile(); + vbaProjectBuffer = obj->GetData().GetEntireFile(); ////// decompress the "dir" stream ////Buffer decompressed; @@ -734,9 +627,7 @@ bool DOCFile::ProcessData() //// parse a module file //ParseModuleStream(bv); - // TODO: parse the compound file binary format - - ParseVBAProject(bv); + ParseVBAProject(); return true; } From 20c58214f2519541793de92ccc4883d66a0b995f Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Mon, 3 Jun 2024 21:44:06 +0300 Subject: [PATCH 08/20] Add naive VBA lexical display --- Types/DOC/src/DOCFile.cpp | 6 +-- Types/VBA/src/VBAFile.cpp | 80 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 80 insertions(+), 6 deletions(-) diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index 2df78aef..61e31751 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -404,10 +404,8 @@ bool DOCFile::ParseModuleStream(BufferView bv, MODULE_Record moduleRecord) DecompressStream(compressed, decompressed); - // TODO: de vazut tf nu merge VBA plugin - //GView::App::OpenBuffer(decompressed, moduleRecord.streamName, "", GView::App::OpenMethod::ForceType, "VBA"); - - GView::App::OpenBuffer(decompressed, moduleRecord.streamName, "", GView::App::OpenMethod::BestMatch, "bin"); + GView::App::OpenBuffer(decompressed, moduleRecord.streamName, "", GView::App::OpenMethod::ForceType, "VBA"); + //GView::App::OpenBuffer(decompressed, moduleRecord.streamName, "", GView::App::OpenMethod::BestMatch, "bin"); return true; } diff --git a/Types/VBA/src/VBAFile.cpp b/Types/VBA/src/VBAFile.cpp index 92317d1c..d511906e 100644 --- a/Types/VBA/src/VBAFile.cpp +++ b/Types/VBA/src/VBAFile.cpp @@ -18,10 +18,86 @@ void VBAFile::GetTokenIDStringRepresentation(uint32 id, AppCUI::Utils::String& s CHECKRET(str.SetFormat("Unknown: 0x%08X", id), ""); } + +uint32 ParseUntilSpace(GView::View::LexicalViewer::TextParser text, uint32 index) +{ + return text.Parse(index, [](char16 c) { return !isspace(c); }); +} + void VBAFile::AnalyzeText(GView::View::LexicalViewer::SyntaxManager& syntax) { - syntax.tokens.Add(1, 0, 5, TokenColor::Keyword); - syntax.tokens.Add(1, 5, 10, TokenColor::String, TokenAlignament::StartsOnNewLine); + uint32 start = 0; + uint32 end = 0; + + TokenAlignament presetAlignament = TokenAlignament::None; + + + while (start < syntax.text.Len()) { + + auto c = syntax.text[start]; + + if (c == ' ') { + end = syntax.text.ParseSpace(end, SpaceType::Space); + if ((uint32) presetAlignament & (uint32) TokenAlignament::StartsOnNewLine) { + syntax.tokens.Add(1, start, end, TokenColor::Word, presetAlignament); + presetAlignament = TokenAlignament::None; + } + start = end; + continue; + } + + if (isalpha(c)) { + end = syntax.text.Parse(start, [](char16 c) { return (bool) isalnum(c) || c == '_'; }); + syntax.tokens.Add(1, start, end, TokenColor::Word, presetAlignament | TokenAlignament::AddSpaceAfter); + start = syntax.text.ParseSpace(end, SpaceType::Space); + presetAlignament = TokenAlignament::None; // TODO: i hate this + continue; + } + + if (isdigit(c)) { + end = syntax.text.Parse(start, [](char16 c) { return (bool) isdigit(c); }); + syntax.tokens.Add(1, start, end, TokenColor::Number, presetAlignament | TokenAlignament::AddSpaceAfter); + start = end; + continue; + } + + // TODO: check for a range of operators + // TODO: fix spacing for certain operators + if (c == '=' || c == '(' || c == ')' || c == ',' || c == '.' || c == '_' || c == '&') { + end = start + 1; + syntax.tokens.Add(1, start, end, TokenColor::Operator, presetAlignament | TokenAlignament::AddSpaceAfter); + start = syntax.text.ParseSpace(end, SpaceType::Space); + presetAlignament = TokenAlignament::None; // TODO: i hate this + continue; + } + + // TODO: account for all types of strings if they are permitted in the language + if (c == '"') { + end = syntax.text.ParseString(start); + syntax.tokens.Add(1, start, end, TokenColor::String, presetAlignament | TokenAlignament::AddSpaceAfter); + start = syntax.text.ParseSpace(end, SpaceType::Space); + continue; + } + + if (c == '\r' || c == '\n') { + end = syntax.text.ParseUntillStartOfNextLine(start); + presetAlignament = TokenAlignament::StartsOnNewLine; + start = end; + continue; + } + + if (c == '\'') { + end = syntax.text.ParseUntillEndOfLine(start); + syntax.tokens.Add(1, start, end, TokenColor::Comment, presetAlignament | TokenAlignament::NewLineAfter); + start = syntax.text.ParseUntillStartOfNextLine(end); + continue; + } + + break; + } + + //syntax.tokens.Add(1, 0, 5, TokenColor::Keyword); + //syntax.tokens.Add(1, 5, 10, TokenColor::String, TokenAlignament::StartsOnNewLine); } bool VBAFile::StringToContent(std::u16string_view string, AppCUI::Utils::UnicodeStringBuilder& result) From add20d40afca4ea58b902606f1f7e5e8d879c517 Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Tue, 4 Jun 2024 18:31:31 +0300 Subject: [PATCH 09/20] Finish VBA dummy text parsing and find generic modules path within vbaProject.bin file --- Types/DOC/include/doc.hpp | 1 + Types/DOC/src/DOCFile.cpp | 54 ++++++++++++++++++++-------- Types/DOC/src/doc.cpp | 3 ++ Types/VBA/src/VBAFile.cpp | 74 +++++++++++++++++++++++++++------------ 4 files changed, 95 insertions(+), 37 deletions(-) diff --git a/Types/DOC/include/doc.hpp b/Types/DOC/include/doc.hpp index 9b4fc09d..8f7011a7 100644 --- a/Types/DOC/include/doc.hpp +++ b/Types/DOC/include/doc.hpp @@ -206,6 +206,7 @@ namespace Type bool DecompressStream(BufferView bv, Buffer& decompressed); bool ParseUncompressedDirStream(BufferView bv); bool ParseModuleStream(BufferView bv, MODULE_Record moduleRecord); + bool FindModulesPath(const CFDirEntry& entry, UnicodeStringBuilder& path); }; namespace Panels diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index 61e31751..d835777a 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -405,7 +405,13 @@ bool DOCFile::ParseModuleStream(BufferView bv, MODULE_Record moduleRecord) DecompressStream(compressed, decompressed); GView::App::OpenBuffer(decompressed, moduleRecord.streamName, "", GView::App::OpenMethod::ForceType, "VBA"); - //GView::App::OpenBuffer(decompressed, moduleRecord.streamName, "", GView::App::OpenMethod::BestMatch, "bin"); + + { // TODO: remove + std::string filepath = "D:\\temp\\modules\\"; + filepath.append(moduleRecord.streamName); + std::ofstream out(filepath, std::ios::binary | std::ios::trunc); + out.write((const char*) decompressed.GetData(), decompressed.GetLength()); + } return true; } @@ -482,6 +488,28 @@ void DOCFile::DisplayAllVBAProjectFiles(CFDirEntry& entry) } +bool DOCFile::FindModulesPath(const CFDirEntry& entry, UnicodeStringBuilder& path) +{ + std::u16string_view name((char16*) entry.data.nameUnicode, entry.data.nameLength / 2 - 1); // take into account the null character + + if (!entry.children.size()) { + return name == u"dir"; + } + + for (const CFDirEntry& child : entry.children) { + UnicodeStringBuilder pathPart; + if (FindModulesPath(child, pathPart)) { + path.Add(name); + path.Add("/"); + path.Add(pathPart); + return true; + } + } + + return false; +} + + bool DOCFile::ParseVBAProject() { ByteStream stream(vbaProjectBuffer); @@ -585,8 +613,13 @@ bool DOCFile::ParseVBAProject() // find file + UnicodeStringBuilder modulesPathUsb; + // TODO: no no no [0] + CHECK(FindModulesPath(root.children[0], modulesPathUsb), false, "modulesPath"); + std::u16string modulesPath = modulesPathUsb; + CFDirEntry dir; - CHECK(root.FindChildByName(u"The VBA Project/_VBA_Project/VBA/dir", dir), false, ""); + CHECK(root.FindChildByName(modulesPath + u"dir", dir), false, ""); Buffer dirData = OpenCFStream(dir); Buffer decompressedDirData; @@ -595,10 +628,13 @@ bool DOCFile::ParseVBAProject() //DisplayAllVBAProjectFiles(root); + // TODO: remove + std::filesystem::remove_all("D:\\temp\\modules\\"); + std::filesystem::create_directory("D:\\temp\\modules\\"); + for (auto& moduleRecord : moduleRecords) { UnicodeStringBuilder streamName(moduleRecord.streamName); - // TODO: make this more generic - std::u16string absoluteStreamName = u"The VBA Project/_VBA_Project/VBA/"; + std::u16string absoluteStreamName = modulesPath; absoluteStreamName.append(streamName); CFDirEntry moduleEntry; @@ -615,16 +651,6 @@ bool DOCFile::ProcessData() { vbaProjectBuffer = obj->GetData().GetEntireFile(); - ////// decompress the "dir" stream - ////Buffer decompressed; - ////DecompressStream(bv, decompressed); - - ////// parse the decompressed dir stream - ////ParseUncompressedDirStream(decompressed); - - //// parse a module file - //ParseModuleStream(bv); - ParseVBAProject(); return true; diff --git a/Types/DOC/src/doc.cpp b/Types/DOC/src/doc.cpp index 11e44c38..0e2d7693 100644 --- a/Types/DOC/src/doc.cpp +++ b/Types/DOC/src/doc.cpp @@ -69,6 +69,9 @@ PLUGIN_EXPORT bool PopulateWindow(Reference win) } PLUGIN_EXPORT void UpdateSettings(IniSection sect) { + sect["Pattern"] = "magic:D0 CF 11 E0 A1 B1 1A E1"; + + // TODO: not quite right sect["Extension"] = { "doc" }; sect["Priority"] = 1; sect["Description"] = "Document (*.doc)"; diff --git a/Types/VBA/src/VBAFile.cpp b/Types/VBA/src/VBAFile.cpp index d511906e..684623a3 100644 --- a/Types/VBA/src/VBAFile.cpp +++ b/Types/VBA/src/VBAFile.cpp @@ -19,11 +19,20 @@ void VBAFile::GetTokenIDStringRepresentation(uint32 id, AppCUI::Utils::String& s } -uint32 ParseUntilSpace(GView::View::LexicalViewer::TextParser text, uint32 index) +uint32 ParseString(GView::View::LexicalViewer::TextParser text, uint32 index) { - return text.Parse(index, [](char16 c) { return !isspace(c); }); + uint32 end = text.Parse(index + 1, [](char16 c) { return c != '"'; }); + return end + 1; } +UnicodeStringBuilder KEYWORDS[] = { UnicodeStringBuilder("Attribute"), UnicodeStringBuilder("Sub"), UnicodeStringBuilder("Private"), UnicodeStringBuilder("As"), UnicodeStringBuilder("Dim"), UnicodeStringBuilder("End"), + UnicodeStringBuilder("ByVal"), UnicodeStringBuilder("Set"), UnicodeStringBuilder("While"), + UnicodeStringBuilder("Wend"), UnicodeStringBuilder("If"), UnicodeStringBuilder("Then") }; + +UnicodeStringBuilder KEYWORDS2[] = { UnicodeStringBuilder("True"), UnicodeStringBuilder("False") }; + +const char operators[] = "=(),._&$+-*/<>#"; + void VBAFile::AnalyzeText(GView::View::LexicalViewer::SyntaxManager& syntax) { uint32 start = 0; @@ -46,36 +55,58 @@ void VBAFile::AnalyzeText(GView::View::LexicalViewer::SyntaxManager& syntax) continue; } + bool parseSpace = false; if (isalpha(c)) { end = syntax.text.Parse(start, [](char16 c) { return (bool) isalnum(c) || c == '_'; }); - syntax.tokens.Add(1, start, end, TokenColor::Word, presetAlignament | TokenAlignament::AddSpaceAfter); - start = syntax.text.ParseSpace(end, SpaceType::Space); - presetAlignament = TokenAlignament::None; // TODO: i hate this - continue; + + TokenColor color = TokenColor::Word; + for (auto keyword : KEYWORDS) { + if (syntax.text.GetSubString(start, end) == keyword) { + color = TokenColor::Keyword; + break; + } + } + + for (auto keyword : KEYWORDS2) { + if (syntax.text.GetSubString(start, end) == keyword) { + color = TokenColor::Keyword2; + break; + } + } + + syntax.tokens.Add(1, start, end, color, presetAlignament); + parseSpace = true; } if (isdigit(c)) { end = syntax.text.Parse(start, [](char16 c) { return (bool) isdigit(c); }); - syntax.tokens.Add(1, start, end, TokenColor::Number, presetAlignament | TokenAlignament::AddSpaceAfter); - start = end; - continue; + syntax.tokens.Add(1, start, end, TokenColor::Number, presetAlignament); + parseSpace = true; } - // TODO: check for a range of operators - // TODO: fix spacing for certain operators - if (c == '=' || c == '(' || c == ')' || c == ',' || c == '.' || c == '_' || c == '&') { - end = start + 1; - syntax.tokens.Add(1, start, end, TokenColor::Operator, presetAlignament | TokenAlignament::AddSpaceAfter); - start = syntax.text.ParseSpace(end, SpaceType::Space); - presetAlignament = TokenAlignament::None; // TODO: i hate this - continue; + for (char op : operators) { + if (c == op) { + end = start + 1; + syntax.tokens.Add(1, start, end, TokenColor::Operator, presetAlignament); + parseSpace = true; + break; + } } - // TODO: account for all types of strings if they are permitted in the language if (c == '"') { - end = syntax.text.ParseString(start); - syntax.tokens.Add(1, start, end, TokenColor::String, presetAlignament | TokenAlignament::AddSpaceAfter); + end = ParseString(syntax.text, start); + syntax.tokens.Add(1, start, end, TokenColor::String, presetAlignament); + parseSpace = true; + } + + if (parseSpace) { start = syntax.text.ParseSpace(end, SpaceType::Space); + + if (start > end) { + presetAlignament = TokenAlignament::AddSpaceBefore; + } else { + presetAlignament = TokenAlignament::None; + } continue; } @@ -95,9 +126,6 @@ void VBAFile::AnalyzeText(GView::View::LexicalViewer::SyntaxManager& syntax) break; } - - //syntax.tokens.Add(1, 0, 5, TokenColor::Keyword); - //syntax.tokens.Add(1, 5, 10, TokenColor::String, TokenAlignament::StartsOnNewLine); } bool VBAFile::StringToContent(std::u16string_view string, AppCUI::Utils::UnicodeStringBuilder& result) From 3e1568082738ecb4692a965c65489c94b31bf1a2 Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Wed, 5 Jun 2024 11:30:06 +0300 Subject: [PATCH 10/20] Add view for plugin --- Types/DOC/include/doc.hpp | 10 ++-- Types/DOC/src/CFDirEntry.cpp | 25 ++++----- Types/DOC/src/DOCFile.cpp | 102 +++++++++++++++++------------------ Types/DOC/src/doc.cpp | 16 +++--- 4 files changed, 78 insertions(+), 75 deletions(-) diff --git a/Types/DOC/include/doc.hpp b/Types/DOC/include/doc.hpp index 8f7011a7..d83dc2bc 100644 --- a/Types/DOC/include/doc.hpp +++ b/Types/DOC/include/doc.hpp @@ -50,9 +50,6 @@ namespace Type } }; - - // TODO: move to another file - #pragma pack(1) struct CFDirEntry_Data { uint8 nameUnicode[64]; // the structure starts from here @@ -145,6 +142,9 @@ namespace Type uint16 miniSectorSize{}; uint16 miniStreamCutoffSize{}; + std::u16string modulesPath; + CFDirEntry root; + // VBA streams helper member variables std::vector referenceControlRecords; std::vector referenceOriginalRecords; @@ -152,6 +152,7 @@ namespace Type std::vector referenceProjectRecords; std::vector moduleRecords; + uint32 moduleRecordIndex = 0; public: DOCFile(); @@ -199,13 +200,12 @@ namespace Type bool ParseVBAProject(); Buffer OpenCFStream(const CFDirEntry& entry); Buffer OpenCFStream(uint32 sect, uint32 size, bool useMiniFAT); - void DisplayAllVBAProjectFiles(CFDirEntry& entry); // VBA streams helper methods bool DecompressStream(BufferView bv, Buffer& decompressed); bool ParseUncompressedDirStream(BufferView bv); - bool ParseModuleStream(BufferView bv, MODULE_Record moduleRecord); + bool ParseModuleStream(BufferView bv, const MODULE_Record& moduleRecord, Buffer& text); bool FindModulesPath(const CFDirEntry& entry, UnicodeStringBuilder& path); }; diff --git a/Types/DOC/src/CFDirEntry.cpp b/Types/DOC/src/CFDirEntry.cpp index 80b30f67..cbdcdd69 100644 --- a/Types/DOC/src/CFDirEntry.cpp +++ b/Types/DOC/src/CFDirEntry.cpp @@ -59,22 +59,23 @@ void CFDirEntry::BuildStorageTree() AppendChildren(data.childId); } - bool CFDirEntry::FindChildByName(std::u16string_view entryName, CFDirEntry& entry) { - for (CFDirEntry& child : children) { - std::u16string_view childName((char16_t*) child.data.nameUnicode, child.data.nameLength / 2 - 1); - if (!entryName.starts_with(childName)) { - continue; - } + std::u16string_view currentEntryName((char16_t*) this->data.nameUnicode, this->data.nameLength / 2 - 1); + if (!entryName.starts_with(currentEntryName)) { + return false; + } - auto pos = entryName.find_first_of(u'/'); - if (pos == std::u16string::npos) { - entry = child; + auto pos = entryName.find_first_of(u'/'); + if (pos == std::u16string::npos) { + entry = *this; + return true; + } + + for (CFDirEntry& child : children) { + std::u16string_view newEntryName = entryName.substr(pos + 1); + if (child.FindChildByName(newEntryName, entry)) { return true; - } else { - std::u16string_view newEntryName = entryName.substr(pos + 1); - return child.FindChildByName(newEntryName, entry); } } return false; diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index d835777a..e3bec290 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -1,7 +1,5 @@ #include "doc.hpp" -#include // TODO: remove - namespace GView::Type::DOC { using namespace GView::View::LexicalViewer; @@ -37,7 +35,6 @@ bool DOCFile::DecompressStream(BufferView bv, Buffer& decompressed) CHECK((header & 0x7000) >> 12 == 0b011, false, ""); // fixed value if (!isCompressed) { - // TODO: verify CHECK(index + 4096 < bv.GetLength(), false, ""); decompressed.Add(BufferView(bv.GetData() + index, 4096)); index += 4096; @@ -314,11 +311,11 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(stream.ReadAs() == 0x02, false, "projectCookie_size"); stream.Seek(sizeof(uint16)); // ignored Cookie + moduleRecords.reserve(modulesCount); + // array of MODULE records for (uint32 moduleIndex = 0; moduleIndex < modulesCount; ++moduleIndex) { // TODO: check this - MUST have a corresponding specified in PROJECT Stream - - // TODO: preallocate them based on modulesCount MODULE_Record& moduleRecord = moduleRecords.emplace_back(); CHECK(stream.ReadAs() == 0x19, false, "moduleName_id"); @@ -390,28 +387,13 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) return true; } -bool DOCFile::ParseModuleStream(BufferView bv, MODULE_Record moduleRecord) +bool DOCFile::ParseModuleStream(BufferView bv, const MODULE_Record& moduleRecord, Buffer& text) { size_t moduleTextOffset = moduleRecord.textOffset; - ByteStream stream(bv); - stream.Seek(moduleTextOffset); - auto compressed = stream.Read(stream.GetSize() - stream.GetCursor()); - - Buffer decompressed; - - DecompressStream(compressed, decompressed); - - GView::App::OpenBuffer(decompressed, moduleRecord.streamName, "", GView::App::OpenMethod::ForceType, "VBA"); - - { // TODO: remove - std::string filepath = "D:\\temp\\modules\\"; - filepath.append(moduleRecord.streamName); - std::ofstream out(filepath, std::ios::binary | std::ios::trunc); - out.write((const char*) decompressed.GetData(), decompressed.GetLength()); - } + DecompressStream(compressed, text); return true; } @@ -419,6 +401,8 @@ bool DOCFile::ParseModuleStream(BufferView bv, MODULE_Record moduleRecord) Buffer DOCFile::OpenCFStream(const CFDirEntry& entry) { + CHECK(entry.data.objectType == 0x02, Buffer(), "incorrect entry"); + auto sect = entry.data.startingSectorLocation; auto size = entry.data.streamSize; bool useMiniFAT = size < miniStreamCutoffSize; @@ -594,8 +578,7 @@ bool DOCFile::ParseVBAProject() Buffer directoryData = OpenCFStream(firstDirectorySectorLocation, vbaProjectBuffer.GetLength(), false); // parse dir entries, starting with root entry - - CFDirEntry root(directoryData, 0); + root = CFDirEntry(directoryData, 0); root.BuildStorageTree(); uint32 streamSize = numberOfMiniFatSectors * sectorSize; @@ -612,61 +595,78 @@ bool DOCFile::ParseVBAProject() miniStream = OpenCFStream(root.data.startingSectorLocation, miniStreamSize, false); // find file - UnicodeStringBuilder modulesPathUsb; - // TODO: no no no [0] - CHECK(FindModulesPath(root.children[0], modulesPathUsb), false, "modulesPath"); - std::u16string modulesPath = modulesPathUsb; + CHECK(FindModulesPath(root, modulesPathUsb), false, "modulesPath"); + modulesPath = modulesPathUsb; CFDirEntry dir; CHECK(root.FindChildByName(modulesPath + u"dir", dir), false, ""); Buffer dirData = OpenCFStream(dir); Buffer decompressedDirData; - DecompressStream(dirData, decompressedDirData); - ParseUncompressedDirStream(decompressedDirData); - - //DisplayAllVBAProjectFiles(root); - - // TODO: remove - std::filesystem::remove_all("D:\\temp\\modules\\"); - std::filesystem::create_directory("D:\\temp\\modules\\"); - - for (auto& moduleRecord : moduleRecords) { - UnicodeStringBuilder streamName(moduleRecord.streamName); - std::u16string absoluteStreamName = modulesPath; - absoluteStreamName.append(streamName); - - CFDirEntry moduleEntry; - CHECK(root.FindChildByName(absoluteStreamName, moduleEntry), false, ""); - - Buffer moduleData = OpenCFStream(moduleEntry); - ParseModuleStream(moduleData, moduleRecord); - } + CHECK(DecompressStream(dirData, decompressedDirData), false, "decompress dir stream"); + CHECK(ParseUncompressedDirStream(decompressedDirData), false, "parse dir stream"); return true; } bool DOCFile::ProcessData() { - vbaProjectBuffer = obj->GetData().GetEntireFile(); + BufferView bv = obj->GetData().GetEntireFile(); - ParseVBAProject(); + if (bv[0] == 0x50 && bv[1] == 0x4b) { + // zip archive - get the vbaProject.bin file if any + + } + + vbaProjectBuffer = bv; + + CHECK(ParseVBAProject(), false, ""); return true; } bool DOCFile::BeginIteration(std::u16string_view path, AppCUI::Controls::TreeViewItem parent) { + moduleRecordIndex = 0; return true; } bool DOCFile::PopulateItem(AppCUI::Controls::TreeViewItem item) { - return false; + MODULE_Record& moduleRecord = moduleRecords[moduleRecordIndex]; + + item.SetText(0, moduleRecord.moduleName); + item.SetText(1, moduleRecord.streamName); + + std::u16string absoluteStreamName = modulesPath; + absoluteStreamName.append(UnicodeStringBuilder(moduleRecord.streamName)); + CFDirEntry moduleEntry; + CHECK(root.FindChildByName(absoluteStreamName, moduleEntry), false, ""); + Buffer moduleBuffer = OpenCFStream(moduleEntry); + Buffer decompressed; + ParseModuleStream(moduleBuffer, moduleRecord, decompressed); + + item.SetText(2, String().Format("%u", decompressed.GetLength())); + + item.SetData(&moduleRecord); + + moduleRecordIndex++; + return moduleRecordIndex < moduleRecords.size(); } void DOCFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewItem item) { + auto moduleRecord = item.GetData(); + + std::u16string absoluteStreamName = modulesPath; + absoluteStreamName.append(UnicodeStringBuilder(moduleRecord->streamName)); + CFDirEntry moduleEntry; + CHECKRET(root.FindChildByName(absoluteStreamName, moduleEntry), ""); + Buffer moduleBuffer = OpenCFStream(moduleEntry); + + Buffer decompressed; + ParseModuleStream(moduleBuffer, moduleRecord, decompressed); + GView::App::OpenBuffer(decompressed, moduleRecord->streamName, "", GView::App::OpenMethod::ForceType, "VBA"); } } // namespace GView::Type::DOC diff --git a/Types/DOC/src/doc.cpp b/Types/DOC/src/doc.cpp index 0e2d7693..df7536c9 100644 --- a/Types/DOC/src/doc.cpp +++ b/Types/DOC/src/doc.cpp @@ -36,7 +36,9 @@ void CreateContainerView(Reference win, Reference< settings.SetIcon(DOC_ICON); settings.SetColumns({ - "n:&Index,a:r,w:50", + "n:&Module name,a:l,w:30", + "n:&Stream name,a:c,w:40", + "n:&Size,a:c,w:15", }); settings.SetEnumerateCallback(win->GetObject()->GetContentType().ToObjectRef()); @@ -59,8 +61,10 @@ PLUGIN_EXPORT bool PopulateWindow(Reference win) { auto doc = win->GetObject()->GetContentType(); - // TODO: check return value - doc->ProcessData(); + if (!doc->ProcessData()) { + AppCUI::Dialogs::MessageBox::ShowError("Error", "Incorrect format!"); + return false; + } CreateContainerView(win, doc); win->AddPanel(Pointer(new DOC::Panels::Information(doc)), true); @@ -70,11 +74,9 @@ PLUGIN_EXPORT bool PopulateWindow(Reference win) PLUGIN_EXPORT void UpdateSettings(IniSection sect) { sect["Pattern"] = "magic:D0 CF 11 E0 A1 B1 1A E1"; - - // TODO: not quite right - sect["Extension"] = { "doc" }; + sect["Extension"] = { "docx", "docm", "xslx", "xslm", "pptx", "pptm" }; sect["Priority"] = 1; - sect["Description"] = "Document (*.doc)"; + sect["Description"] = "Office file (*.docx, *.xslx, *.pptx) / vbaProject.bin compound file"; } } From 8059285ca970e987df5556b4e8468d3c3412c16e Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Wed, 5 Jun 2024 18:10:16 +0300 Subject: [PATCH 11/20] Add info about vba and fix parsing bug for certain files --- GViewCore/src/ZIP/zip.cpp | 12 ++-- Types/DOC/include/doc.hpp | 35 ++++++++++- Types/DOC/src/DOCFile.cpp | 92 ++++++++++++++-------------- Types/DOC/src/PanelInformation.cpp | 96 +++++++++++++++++++++++++++++- Types/DOC/src/doc.cpp | 10 +++- 5 files changed, 188 insertions(+), 57 deletions(-) diff --git a/GViewCore/src/ZIP/zip.cpp b/GViewCore/src/ZIP/zip.cpp index b1271b3f..95fb42e3 100644 --- a/GViewCore/src/ZIP/zip.cpp +++ b/GViewCore/src/ZIP/zip.cpp @@ -375,13 +375,14 @@ bool GetInfo(std::u16string_view path, Info& info) CHECK(mz_zip_reader_open_file(internalInfo->reader.value, internalInfo->path.c_str()) == MZ_OK, false, ""); CHECK(mz_zip_reader_goto_first_entry(internalInfo->reader.value) == MZ_OK, false, ""); - do - { + do { mz_zip_file* zipFile{ nullptr }; CHECKBK(mz_zip_reader_entry_get_info(internalInfo->reader.value, &zipFile) == MZ_OK, ""); mz_zip_reader_set_pattern(internalInfo->reader.value, nullptr, 1); // do we need a pattern? - auto& entry = internalInfo->entries.emplace_back(); + size_t entryIndex = internalInfo->entries.size(); + auto& entry = internalInfo->entries.emplace_back(); + ConvertZipFileInfoToEntry(zipFile, entry); std::u8string_view filename = entry.filename; @@ -390,17 +391,18 @@ bool GetInfo(std::u16string_view path, Info& info) } size_t offset = 0; - + while (true) { size_t pos = filename.find_first_of('/', offset); CHECKBK(pos != std::string::npos, ""); // add the parent as well if not already present + auto& entry = internalInfo->entries[entryIndex]; auto parentFilename = entry.filename.substr(0, pos + 1); auto it = std::find_if( - internalInfo->entries.begin(), internalInfo->entries.end(), [&](const _Entry& e) -> bool { return e.filename == parentFilename; }); + internalInfo->entries.begin(), internalInfo->entries.end(), [&](const _Entry& e) -> bool { return e.filename == parentFilename; }); if (it == internalInfo->entries.end()) { auto& parentEntry = internalInfo->entries.emplace_back(); parentEntry.filename = parentFilename; diff --git a/Types/DOC/include/doc.hpp b/Types/DOC/include/doc.hpp index d83dc2bc..fc8e2353 100644 --- a/Types/DOC/include/doc.hpp +++ b/Types/DOC/include/doc.hpp @@ -119,6 +119,7 @@ namespace Type uint16 minorVersion; }; + // TODO: add docstring to items in view struct MODULE_Record { String moduleName; String streamName; @@ -127,21 +128,49 @@ namespace Type uint32 helpContext; }; + enum SysKind { Win16Bit = 0, Win32Bit, Macintosh, Win64Bit }; + + class DOCFile : public TypeInterface, public View::ContainerViewer::EnumerateInterface, public View::ContainerViewer::OpenItemInterface { private: + constexpr static uint8 CF_HEADER_SIGNATURE[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; + constexpr static size_t DIFAT_LOCATIONS_COUNT = 109; + friend class Panels::Information; + // displayed info about the file + uint16 cfMinorVersion; + uint16 cfMajorVersion; + uint32 transactionSignatureNumber; + uint32 numberOfFatSectors; + uint32 numberOfMiniFatSectors; + uint32 numberOfDifatSectors; + uint32 firstDirectorySectorLocation; + uint32 firstMiniFatSectorLocation; + uint32 firstDifatSectorLocation; + + uint32 dirMajorVersion; + uint16 dirMinorVersion; + SysKind sysKind; + String projectName; + String docString; + String helpFile; + String constants; + uint16 modulesCount; + // compound files (vbaProject.bin) helper member variables AppCUI::Utils::Buffer vbaProjectBuffer; AppCUI::Utils::Buffer FAT; AppCUI::Utils::Buffer miniStream; AppCUI::Utils::Buffer miniFAT; + public: uint16 sectorSize{}; uint16 miniSectorSize{}; uint16 miniStreamCutoffSize{}; + private: std::u16string modulesPath; CFDirEntry root; @@ -215,7 +244,11 @@ namespace Type { Reference doc; Reference general; - Reference headers; + Reference compoundFileInfo; + Reference vbaStreamsInfo; + + inline static const auto dec = NumericFormat{ NumericFormatFlags::None, 10, 3, ',' }; + inline static const auto hex = NumericFormat{ NumericFormatFlags::HexPrefix, 16 }; void UpdateGeneralInformation(); void UpdateIssues(); diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index e3bec290..89742a1b 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -26,13 +26,16 @@ bool DOCFile::DecompressStream(BufferView bv, Buffer& decompressed) while (index < bv.GetLength()) { // loop over chunks + size_t chunkStartIndex = index; + uint16 header = bv[index] + (bv[index + 1] << 8); index += 2; uint16 chunkLength = header & 0x0fff; // + 3, for total size bool isCompressed = header & 0x8000; // most significant bit - CHECK((header & 0x7000) >> 12 == 0b011, false, ""); // fixed value + uint8 headerSignature = (header & 0x7000) >> 12; + CHECK(headerSignature == 0b011, false, ""); // fixed value if (!isCompressed) { CHECK(index + 4096 < bv.GetLength(), false, ""); @@ -42,10 +45,11 @@ bool DOCFile::DecompressStream(BufferView bv, Buffer& decompressed) } // Token Sequence series - while (index < chunkLength + 3) { + size_t end = chunkStartIndex + chunkLength + 3; + while (index < end) { unsigned char flags = bv[index++]; for (int i = 0; i < 8; ++i) { - if (index > chunkLength + 3) { + if (index >= end) { break; } @@ -90,9 +94,6 @@ bool DOCFile::DecompressStream(BufferView bv, Buffer& decompressed) return true; } -enum SysKind { Win16Bit = 0, Win32Bit, Macintosh, Win64Bit }; - - bool DOCFile::ParseUncompressedDirStream(BufferView bv) { ByteStream stream((void*) bv.GetData(), bv.GetLength()); @@ -102,9 +103,17 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(stream.ReadAs() == 0x01, false, "projectsyskind_id"); CHECK(stream.ReadAs() == 0x04, false, "projectsyskind_size"); - SysKind sysKind = (SysKind) stream.ReadAs(); + sysKind = (SysKind) stream.ReadAs(); - CHECK(stream.ReadAs() == 0x02, false, "projectlcid_id"); + check = stream.ReadAs(); + if (check == 0x4a) { + // PROJECTCOMPATVERSION + CHECK(stream.ReadAs() == 0x04, false, "projectcompat_size"); + stream.Seek(sizeof(uint32)); // compatVersion skipped for now + check = stream.ReadAs(); + } + + CHECK(check == 0x02, false, "projectlcid_id"); CHECK(stream.ReadAs() == 0x04, false, "projectlcid_size"); CHECK(stream.ReadAs() == 0x0409, false, "projectlcid_lcid"); @@ -119,12 +128,12 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(stream.ReadAs() == 0x04, false, "projectname_id"); auto projectName_size = stream.ReadAs(); CHECK(projectName_size >= 1 && projectName_size <= 128, false, "projectname_size"); - String projectName(stream.Read(projectName_size)); + projectName = String(stream.Read(projectName_size)); CHECK(stream.ReadAs() == 0x05, false, "projectdocstring_id"); auto projectDocString_size = stream.ReadAs(); CHECK(projectDocString_size <= 2000, false, "projectdocstring_size"); - String docstring(stream.Read(projectDocString_size)); // TODO: decode + docString = String(stream.Read(projectDocString_size)); // TODO: decode CHECK(stream.ReadAs() == 0x40, false, "reserved"); auto projectDocStringUnicode_size = stream.ReadAs(); @@ -143,6 +152,8 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(helpFile1[i] == helpFile2[i], false, "helpFiles"); } + helpFile = helpFile1; + CHECK(stream.ReadAs() == 0x07, false, "projectHelpContext_id"); CHECK(stream.ReadAs() == 0x04, false, "projectHelpContext_size"); auto projectHelpContext = stream.ReadAs(); @@ -153,14 +164,14 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(stream.ReadAs() == 0x09, false, "projectVersoin_id"); CHECK(stream.ReadAs() == 0x04, false, "reserved"); - auto versionMajor = stream.ReadAs(); - auto versionMinor = stream.ReadAs(); + dirMajorVersion = stream.ReadAs(); + dirMinorVersion = stream.ReadAs(); CHECK(stream.ReadAs() == 0x0c, false, "projectConstants_id"); auto projectConstants_size = stream.ReadAs(); CHECK(projectConstants_size <= 1015, false, "projectConstants_size"); - String constants(stream.Read(projectConstants_size)); // TODO: decode and ABNF + constants = String(stream.Read(projectConstants_size)); // TODO: decode and ABNF CHECK(stream.ReadAs() == 0x3c, false, "reserved"); auto projectConstantsUnicode_size = stream.ReadAs(); @@ -306,7 +317,7 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) // PROJECTMODULES CHECK(stream.ReadAs() == 0x02, false, "size"); - auto modulesCount = stream.ReadAs(); + modulesCount = stream.ReadAs(); CHECK(stream.ReadAs() == 0x13, false, "projectCookie_id"); CHECK(stream.ReadAs() == 0x02, false, "projectCookie_size"); stream.Seek(sizeof(uint16)); // ignored Cookie @@ -498,21 +509,20 @@ bool DOCFile::ParseVBAProject() { ByteStream stream(vbaProjectBuffer); - constexpr uint8 headerSignature[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; - for (uint32 i = 0; i < ARRAY_LEN(headerSignature); ++i) { - CHECK(stream.ReadAs() == headerSignature[i], false, "headerSignature"); + for (uint32 i = 0; i < ARRAY_LEN(CF_HEADER_SIGNATURE); ++i) { + CHECK(stream.ReadAs() == CF_HEADER_SIGNATURE[i], false, "headerSignature"); } CHECK(stream.ReadAs() == 0, false, "headerCLSID"); CHECK(stream.ReadAs() == 0, false, "headerCLSID"); - auto minorVersion = stream.ReadAs(); // TODO: This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004. - auto majorVersion = stream.ReadAs(); - CHECK(majorVersion == 0x03 || majorVersion == 0x04, false, "majorVersion"); + cfMinorVersion = stream.ReadAs(); // TODO: This field SHOULD be set to 0x003E if the major version field is either 0x0003 or 0x0004. + cfMajorVersion = stream.ReadAs(); + CHECK(cfMajorVersion == 0x03 || cfMajorVersion == 0x04, false, "majorVersion"); CHECK(stream.ReadAs() == 0xfffe, false, "byteOrder"); auto sectorShift = stream.ReadAs(); - CHECK((majorVersion == 0x03 && sectorShift == 0x09) || (majorVersion == 0x04 && sectorShift == 0x0c), false, "sectorShift"); + CHECK((cfMajorVersion == 0x03 && sectorShift == 0x09) || (cfMajorVersion == 0x04 && sectorShift == 0x0c), false, "sectorShift"); sectorSize = 1 << sectorShift; auto miniSectorShift = stream.ReadAs(); @@ -523,40 +533,37 @@ bool DOCFile::ParseVBAProject() CHECK(stream.ReadAs() == 0x00, false, "reserved"); auto numberOfDirectorySectors = stream.ReadAs(); - if (majorVersion == 0x03) { + if (cfMajorVersion == 0x03) { CHECK(numberOfDirectorySectors == 0x00, false, "numberOfDirectorySectors"); } - auto numberOfFatSectors = stream.ReadAs(); - auto firstDirectorySectorLocation = stream.ReadAs(); - auto transactionSignatureNumber = stream.ReadAs(); // incremented every time the file is saved + numberOfFatSectors = stream.ReadAs(); + firstDirectorySectorLocation = stream.ReadAs(); + transactionSignatureNumber = stream.ReadAs(); // incremented every time the file is saved miniStreamCutoffSize = stream.ReadAs(); CHECK(miniStreamCutoffSize == 0x1000, false, "miniStreamCutoffSize"); - auto firstMiniFatSectorLocation = stream.ReadAs(); - auto numberOfMiniFatSectors = stream.ReadAs(); - auto firstDifatSectorLocation = stream.ReadAs(); - auto numberOfDifatSectors = stream.ReadAs(); + firstMiniFatSectorLocation = stream.ReadAs(); + numberOfMiniFatSectors = stream.ReadAs(); + firstDifatSectorLocation = stream.ReadAs(); + numberOfDifatSectors = stream.ReadAs(); - // TODO: where to use this? - constexpr size_t locationsCount = 109; - uint32 DIFAT[locationsCount]; // the first 109 FAT sector locations of the compound file + uint32 DIFAT[DIFAT_LOCATIONS_COUNT]; // the first DIFAT sector locations of the compound file { - auto difatBv = stream.Read(locationsCount * sizeof(*DIFAT)); + auto difatBv = stream.Read(DIFAT_LOCATIONS_COUNT * sizeof(*DIFAT)); memcpy(DIFAT, (void*) difatBv.GetData(), difatBv.GetLength()); } - if (majorVersion == 0x04) { + if (cfMajorVersion == 0x04) { // check if the next 3584 bytes are 0 - uint32 zeroCheckIndex = 3584; - while (zeroCheckIndex--) { + while (stream.GetCursor() < sectorSize) { CHECK(stream.ReadAs() == 0x00, false, "zeroCheck"); } } // load FAT - for (size_t locationIndex = 0; locationIndex < locationsCount; ++locationIndex) { + for (size_t locationIndex = 0; locationIndex < DIFAT_LOCATIONS_COUNT; ++locationIndex) { uint32 sect = DIFAT[locationIndex]; if (sect == ENDOFCHAIN || sect == FREESECT) { // end of sector chain @@ -612,17 +619,8 @@ bool DOCFile::ParseVBAProject() bool DOCFile::ProcessData() { - BufferView bv = obj->GetData().GetEntireFile(); - - if (bv[0] == 0x50 && bv[1] == 0x4b) { - // zip archive - get the vbaProject.bin file if any - - } - - vbaProjectBuffer = bv; - + vbaProjectBuffer = obj->GetData().GetEntireFile(); CHECK(ParseVBAProject(), false, ""); - return true; } diff --git a/Types/DOC/src/PanelInformation.cpp b/Types/DOC/src/PanelInformation.cpp index 977c5474..bb56cb83 100644 --- a/Types/DOC/src/PanelInformation.cpp +++ b/Types/DOC/src/PanelInformation.cpp @@ -6,8 +6,9 @@ using namespace AppCUI::Controls; Panels::Information::Information(Reference _doc) : TabPage("&Information") { doc = _doc; - general = Factory::ListView::Create(this, "x:0,y:0,w:100%,h:10", { "n:Field,w:12", "n:Value,w:100" }, ListViewFlags::None); - headers = Factory::ListView::Create(this, "x:0,y:10,w:100%,h:20", { "n:Field,w:12", "n:Value,w:10000" }, ListViewFlags::None); + general = Factory::ListView::Create(this, "x:0,y:0,w:100%,h:5", { "n:Field,w:16", "n:Value,w:100" }, ListViewFlags::None); + compoundFileInfo = Factory::ListView::Create(this, "x:0,y:5,w:100%,h:10", { "n:Field,w:16", "n:Value,w:10000" }, ListViewFlags::None); + vbaStreamsInfo = Factory::ListView::Create(this, "x:0,y:15,w:100%,h:20", { "n:Field,w:16", "n:Value,w:10000" }, ListViewFlags::None); this->Update(); } @@ -23,6 +24,97 @@ void Panels::Information::UpdateGeneralInformation() auto value = tempStr.Format("%s bytes", sizeString); general->AddItem({ "Size", value }); } + + NumericFormatter nf; + + compoundFileInfo->AddItem("Compound file"); + vbaStreamsInfo->AddItem({ "Project name", doc->projectName }); + compoundFileInfo->AddItem({ "Minor version", nf.ToString(doc->cfMinorVersion, hex) }); + compoundFileInfo->AddItem({ "Major version", nf.ToString(doc->cfMajorVersion, hex) }); + compoundFileInfo->AddItem({ "Transaction signature number", nf.ToString(doc->transactionSignatureNumber, dec) }); + compoundFileInfo->AddItem({ "FAT sectors count", nf.ToString(doc->numberOfFatSectors, dec) }); + compoundFileInfo->AddItem({ "MiniFAT sectors count", nf.ToString(doc->numberOfMiniFatSectors, dec) }); + compoundFileInfo->AddItem({ "DIFAT sectors count", nf.ToString(doc->numberOfDifatSectors, dec) }); + compoundFileInfo->AddItem({ "First directory sector", nf.ToString(doc->firstDirectorySectorLocation, hex) }); + compoundFileInfo->AddItem({ "First MiniFAT sector", nf.ToString(doc->firstMiniFatSectorLocation, hex) }); + compoundFileInfo->AddItem({ "First DIFAT sector", nf.ToString(doc->firstDifatSectorLocation, hex) }); + + vbaStreamsInfo->AddItem("VBA streams"); + vbaStreamsInfo->AddItem({ "Major version", nf.ToString(doc->dirMajorVersion, hex) }); + vbaStreamsInfo->AddItem({ "Minor version", nf.ToString(doc->dirMinorVersion, hex) }); + vbaStreamsInfo->AddItem({ "Modules path", doc->modulesPath }); + + switch (doc->sysKind) { + case Win16Bit: + vbaStreamsInfo->AddItem({ "System kind", "Win16Bit" }); + break; + case Win32Bit: + vbaStreamsInfo->AddItem({ "System kind", "Win32Bit" }); + break; + case Macintosh: + vbaStreamsInfo->AddItem({ "System kind", "Macintosh" }); + break; + case Win64Bit: + vbaStreamsInfo->AddItem({ "System kind", "Win64Bit" }); + break; + default: + vbaStreamsInfo->AddItem({ "System kind", "Unknown" }); + break; + } + + vbaStreamsInfo->AddItem({ "Doc string", doc->docString }); + vbaStreamsInfo->AddItem({ "Help file", doc->helpFile }); + vbaStreamsInfo->AddItem({ "Constants", doc->constants }); + vbaStreamsInfo->AddItem({ "Modules count", nf.ToString(doc->modulesCount, dec) }); + + LocalString<256> header; + uint32 index; + + index = 0; + for (const auto& record : doc->referenceControlRecords) { + vbaStreamsInfo->AddItem(""); + header.Format("Reference control record #%s", nf.ToString(index++, dec).data()); + vbaStreamsInfo->AddItem(header); + + vbaStreamsInfo->AddItem({ "Libid twiddled", record.libidTwiddled }); + vbaStreamsInfo->AddItem({ "Name record extended", record.nameRecordExtended }); + vbaStreamsInfo->AddItem({ "Libid extended", record.libidExtended }); + vbaStreamsInfo->AddItem({ "Cookie", nf.ToString(record.cookie, hex) }); + } + + index = 0; + for (const auto& record : doc->referenceOriginalRecords) { + vbaStreamsInfo->AddItem(""); + header.Format("Reference original record #%s", nf.ToString(index++, dec).data()); + vbaStreamsInfo->AddItem(header); + + vbaStreamsInfo->AddItem({ "Libid original", record.libidOriginal }); + vbaStreamsInfo->AddItem({ "Libid twiddled", record.referenceControl.libidTwiddled }); + vbaStreamsInfo->AddItem({ "Name record extended", record.referenceControl.nameRecordExtended }); + vbaStreamsInfo->AddItem({ "Libid extended", record.referenceControl.libidExtended }); + vbaStreamsInfo->AddItem({ "Cookie", nf.ToString(record.referenceControl.cookie, hex) }); + } + + index = 0; + for (const auto& record : doc->referenceRegisteredRecords) { + vbaStreamsInfo->AddItem(""); + header.Format("Reference registered record #%s", nf.ToString(index++, dec).data()); + vbaStreamsInfo->AddItem(header); + + vbaStreamsInfo->AddItem({ "Libid", record.libid }); + } + + index = 0; + for (const auto& record : doc->referenceProjectRecords) { + vbaStreamsInfo->AddItem(""); + header.Format("Reference absolute record #%s", nf.ToString(index++, dec).data()); + vbaStreamsInfo->AddItem(header); + + vbaStreamsInfo->AddItem({ "Libid absolute", record.libidAbsolute }); + vbaStreamsInfo->AddItem({ "Libid relative", record.libidRelative }); + vbaStreamsInfo->AddItem({ "Major version", nf.ToString(record.majorVersion, hex) }); + vbaStreamsInfo->AddItem({ "Minor version", nf.ToString(record.minorVersion, hex) }); + } } void Panels::Information::UpdateIssues() diff --git a/Types/DOC/src/doc.cpp b/Types/DOC/src/doc.cpp index df7536c9..4897656b 100644 --- a/Types/DOC/src/doc.cpp +++ b/Types/DOC/src/doc.cpp @@ -34,6 +34,13 @@ void CreateContainerView(Reference win, Reference< { ContainerViewer::Settings settings; + const auto hex = NumericFormat{ NumericFormatFlags::HexPrefix, 16 }; + + NumericFormatter nf; // should not use the same numerical formatter for multiple operations, but AddProperty owns the given string so it's fine + settings.AddProperty("Sector size", nf.ToString(doc->sectorSize, hex)); + settings.AddProperty("Mini sector size", nf.ToString(doc->miniSectorSize, hex)); + settings.AddProperty("Mini stream cutoff", nf.ToString(doc->miniStreamCutoffSize, hex)); + settings.SetIcon(DOC_ICON); settings.SetColumns({ "n:&Module name,a:l,w:30", @@ -74,9 +81,8 @@ PLUGIN_EXPORT bool PopulateWindow(Reference win) PLUGIN_EXPORT void UpdateSettings(IniSection sect) { sect["Pattern"] = "magic:D0 CF 11 E0 A1 B1 1A E1"; - sect["Extension"] = { "docx", "docm", "xslx", "xslm", "pptx", "pptm" }; sect["Priority"] = 1; - sect["Description"] = "Office file (*.docx, *.xslx, *.pptx) / vbaProject.bin compound file"; + sect["Description"] = "Compound file containing VBA macros (vbaProject.bin)"; } } From e2aecbcdf6b5572ad82660a2a58b6a961cbc82f6 Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Wed, 5 Jun 2024 19:41:04 +0300 Subject: [PATCH 12/20] Fix decompress bug for multiple chunks --- Types/DOC/src/DOCFile.cpp | 48 ++++++++++++++++++++------------------- Types/VBA/src/VBAFile.cpp | 4 +--- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index 89742a1b..840cd933 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -46,6 +46,7 @@ bool DOCFile::DecompressStream(BufferView bv, Buffer& decompressed) // Token Sequence series size_t end = chunkStartIndex + chunkLength + 3; + size_t decompressedChunkStart = decompressed.GetLength(); while (index < end) { unsigned char flags = bv[index++]; for (int i = 0; i < 8; ++i) { @@ -56,7 +57,7 @@ bool DOCFile::DecompressStream(BufferView bv, Buffer& decompressed) if (flags & 0x01) { // 2 bytes (Copy Token) - int offsetBits = ceil(log2(decompressed.GetLength())); // number of bits used for the offset value + int offsetBits = ceil(log2(decompressed.GetLength() - decompressedChunkStart)); // number of bits used for the offset value if (offsetBits < 4) { offsetBits = 4; @@ -123,7 +124,7 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(stream.ReadAs() == 0x03, false, "projectcodepage_id"); CHECK(stream.ReadAs() == 0x02, false, "projectcodepage_size"); - auto codePage = stream.ReadAs(); // TODO: what to do with the codec? + auto codePage = stream.ReadAs(); CHECK(stream.ReadAs() == 0x04, false, "projectname_id"); auto projectName_size = stream.ReadAs(); @@ -133,12 +134,12 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(stream.ReadAs() == 0x05, false, "projectdocstring_id"); auto projectDocString_size = stream.ReadAs(); CHECK(projectDocString_size <= 2000, false, "projectdocstring_size"); - docString = String(stream.Read(projectDocString_size)); // TODO: decode + docString = String(stream.Read(projectDocString_size)); CHECK(stream.ReadAs() == 0x40, false, "reserved"); auto projectDocStringUnicode_size = stream.ReadAs(); CHECK(projectDocStringUnicode_size % 2 == 0, false, "projectDocStringUnicode_size"); - UnicodeStringBuilder projectDocStringUnicode(stream.Read(projectDocStringUnicode_size)); // TODO: decode + UnicodeStringBuilder projectDocStringUnicode(stream.Read(projectDocStringUnicode_size)); CHECK(stream.ReadAs() == 0x06, false, "helpFile1_id"); auto helpFile1_size = stream.ReadAs(); @@ -171,12 +172,12 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) auto projectConstants_size = stream.ReadAs(); CHECK(projectConstants_size <= 1015, false, "projectConstants_size"); - constants = String(stream.Read(projectConstants_size)); // TODO: decode and ABNF + constants = String(stream.Read(projectConstants_size)); CHECK(stream.ReadAs() == 0x3c, false, "reserved"); auto projectConstantsUnicode_size = stream.ReadAs(); CHECK(projectConstantsUnicode_size % 2 == 0, false, "projectConstantsUnicode_size"); - UnicodeStringBuilder constantsUnicode(stream.Read(projectConstantsUnicode_size)); // TODO: decode and ABNF + UnicodeStringBuilder constantsUnicode(stream.Read(projectConstantsUnicode_size)); uint32 recordIndex = 0; @@ -191,7 +192,7 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(referenceName_id == 0x16, false, "referenceName_id"); auto referenceName_size = stream.ReadAs(); - String referenceName(stream.Read(referenceName_size)); // TODO: decode and ABNF + String referenceName(stream.Read(referenceName_size)); CHECK(stream.ReadAs() == 0x3e, false, "reserved"); auto referenceNameUnicode_size = stream.ReadAs(); UnicodeStringBuilder referenceNameUnicode(stream.Read(referenceNameUnicode_size)); @@ -208,7 +209,6 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) stream.Seek(sizeof(uint32)); // SizeTwiddled auto sizeOfLibidTwiddled = stream.ReadAs(); - // TODO: check string - https://learn.microsoft.com/en-us/openspecs/office_file_formats/ms-ovba/d64485fa-8562-4726-9c5e-11e8f01a81c0 record.libidTwiddled = String(stream.Read(sizeOfLibidTwiddled)); CHECK(stream.ReadAs() == 0x00, false, "reserved1"); CHECK(stream.ReadAs() == 0x00, false, "reserved2"); @@ -218,7 +218,7 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) if (check == 0x16) { // optional NameRecordExtended auto sizeOfName = stream.ReadAs(); - record.nameRecordExtended = String(stream.Read(sizeOfName)); // TODO: decode and ABNF + record.nameRecordExtended = String(stream.Read(sizeOfName)); CHECK(stream.ReadAs() == 0x3e, false, "reserved"); auto sizeOfNameUnicode = stream.ReadAs(); UnicodeStringBuilder nameUnicode(stream.Read(sizeOfNameUnicode)); @@ -228,7 +228,7 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(check == 0x30, false, "reserved3"); stream.Seek(sizeof(uint32)); // SizeExtended auto sizeOfLibidExtended = stream.ReadAs(); - record.libidExtended = String(stream.Read(sizeOfLibidExtended)); // TODO: decode and ABNF + record.libidExtended = String(stream.Read(sizeOfLibidExtended)); CHECK(stream.ReadAs() == 0x00, false, "reserved4"); CHECK(stream.ReadAs() == 0x00, false, "reserved5"); record.originalTypeLib = BufferView(stream.Read(16)); @@ -243,7 +243,7 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) record.recordIndex = recordIndex; auto sizeOfLibidOriginal = stream.ReadAs(); - record.libidOriginal = String(stream.Read(sizeOfLibidOriginal)); // TODO: decode and ABNF + record.libidOriginal = String(stream.Read(sizeOfLibidOriginal)); CHECK(stream.ReadAs() == 0x2f, false, "referenceControl_id"); stream.Seek(sizeof(uint32)); // SizeTwiddled @@ -257,7 +257,7 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) if (check == 0x16) { // optional NameRecordExtended auto sizeOfName = stream.ReadAs(); - record.referenceControl.nameRecordExtended = String(stream.Read(sizeOfName)); // TODO: decode and ABNF + record.referenceControl.nameRecordExtended = String(stream.Read(sizeOfName)); CHECK(stream.ReadAs() == 0x3e, false, "reserved"); auto sizeOfNameUnicode = stream.ReadAs(); UnicodeStringBuilder nameUnicode(stream.Read(sizeOfNameUnicode)); @@ -267,7 +267,7 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(check == 0x30, false, "reserved3"); stream.Seek(sizeof(uint32)); // SizeExtended auto sizeOfLibidExtended = stream.ReadAs(); - record.referenceControl.libidExtended = String(stream.Read(sizeOfLibidExtended)); // TODO: decode and ABNF + record.referenceControl.libidExtended = String(stream.Read(sizeOfLibidExtended)); CHECK(stream.ReadAs() == 0x00, false, "reserved4"); CHECK(stream.ReadAs() == 0x00, false, "reserved5"); record.referenceControl.originalTypeLib = BufferView(stream.Read(16)); @@ -284,7 +284,7 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) stream.Seek(sizeof(uint32)); // ignored Size auto sizeOfLibid = stream.ReadAs(); - record.libid = String(stream.Read(sizeOfLibid)); // TODO: decode and ABNF + record.libid = String(stream.Read(sizeOfLibid)); CHECK(stream.ReadAs() == 0x00, false, "reserved1"); CHECK(stream.ReadAs() == 0x00, false, "reserved2"); @@ -299,9 +299,9 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) stream.Seek(sizeof(uint32)); // ignored Size auto sizeOfLibidAbsolute = stream.ReadAs(); - record.libidAbsolute = String(stream.Read(sizeOfLibidAbsolute)); // TODO: decode and ABNF + record.libidAbsolute = String(stream.Read(sizeOfLibidAbsolute)); auto sizeOfLibidRelative = stream.ReadAs(); - record.libidRelative = String(stream.Read(sizeOfLibidRelative)); // TODO: decode and ABNF + record.libidRelative = String(stream.Read(sizeOfLibidRelative)); record.majorVersion = stream.ReadAs(); record.minorVersion = stream.ReadAs(); @@ -331,26 +331,26 @@ bool DOCFile::ParseUncompressedDirStream(BufferView bv) CHECK(stream.ReadAs() == 0x19, false, "moduleName_id"); auto sizeOfModuleName = stream.ReadAs(); - // TODO: decode and ABNF + moduleRecord.moduleName = String(stream.Read(sizeOfModuleName)); CHECK(stream.ReadAs() == 0x47, false, "moduleNameUnicode_id"); auto sizeOfModuleNameUnicode = stream.ReadAs(); CHECK(sizeOfModuleNameUnicode % 2 == 0, false, "sizeOfModuleNameUnicode"); - UnicodeStringBuilder moduleNameUnicode(stream.Read(sizeOfModuleNameUnicode)); // TODO: decode and ABNF + UnicodeStringBuilder moduleNameUnicode(stream.Read(sizeOfModuleNameUnicode)); CHECK(stream.ReadAs() == 0x1a, false, "moduleStreamName_id"); auto sizeOfStreamName = stream.ReadAs(); - moduleRecord.streamName = String(stream.Read(sizeOfStreamName)); // TODO: decode and ABNF + moduleRecord.streamName = String(stream.Read(sizeOfStreamName)); CHECK(stream.ReadAs() == 0x32, false, "reserved"); auto sizeOfStreamNameUnicode = stream.ReadAs(); CHECK(sizeOfStreamNameUnicode % 2 == 0, false, "sizeOfStreamNameUnicode"); - String streamNameUnicode(stream.Read(sizeOfStreamNameUnicode)); // TODO: decode and ABNF + String streamNameUnicode(stream.Read(sizeOfStreamNameUnicode)); CHECK(stream.ReadAs() == 0x1c, false, "moduleDocString_id"); auto sizeOfDocString = stream.ReadAs(); - // TODO: decode and ABNF + moduleRecord.docString = String(stream.Read(sizeOfDocString)); CHECK(stream.ReadAs() == 0x48, false, "reserved"); auto sizeOfDocStringUnicode = stream.ReadAs(); @@ -404,7 +404,7 @@ bool DOCFile::ParseModuleStream(BufferView bv, const MODULE_Record& moduleRecord ByteStream stream(bv); stream.Seek(moduleTextOffset); auto compressed = stream.Read(stream.GetSize() - stream.GetCursor()); - DecompressStream(compressed, text); + CHECK(DecompressStream(compressed, text), false, "decompress"); return true; } @@ -664,7 +664,9 @@ void DOCFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewIte Buffer moduleBuffer = OpenCFStream(moduleEntry); Buffer decompressed; - ParseModuleStream(moduleBuffer, moduleRecord, decompressed); + if (!ParseModuleStream(moduleBuffer, moduleRecord, decompressed)) { + AppCUI::Dialogs::MessageBox::ShowError("Error", "Module parse error!"); + } GView::App::OpenBuffer(decompressed, moduleRecord->streamName, "", GView::App::OpenMethod::ForceType, "VBA"); } } // namespace GView::Type::DOC diff --git a/Types/VBA/src/VBAFile.cpp b/Types/VBA/src/VBAFile.cpp index 684623a3..c07ad6f1 100644 --- a/Types/VBA/src/VBAFile.cpp +++ b/Types/VBA/src/VBAFile.cpp @@ -31,7 +31,7 @@ UnicodeStringBuilder KEYWORDS[] = { UnicodeStringBuilder("Attribute"), UnicodeSt UnicodeStringBuilder KEYWORDS2[] = { UnicodeStringBuilder("True"), UnicodeStringBuilder("False") }; -const char operators[] = "=(),._&$+-*/<>#"; +const char operators[] = "=(),._&$+-*/<>#:"; void VBAFile::AnalyzeText(GView::View::LexicalViewer::SyntaxManager& syntax) { @@ -40,7 +40,6 @@ void VBAFile::AnalyzeText(GView::View::LexicalViewer::SyntaxManager& syntax) TokenAlignament presetAlignament = TokenAlignament::None; - while (start < syntax.text.Len()) { auto c = syntax.text[start]; @@ -123,7 +122,6 @@ void VBAFile::AnalyzeText(GView::View::LexicalViewer::SyntaxManager& syntax) start = syntax.text.ParseUntillStartOfNextLine(end); continue; } - break; } } From f4e0cb0c39b6ccaf3b3b78fafae9860bf36aac5b Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Thu, 6 Jun 2024 14:30:46 +0300 Subject: [PATCH 13/20] Fix base64 trailing bytes & zip issue when opening archive from DataCache --- GViewCore/src/Unpack/Base64.cpp | 6 ++++++ GViewCore/src/ZIP/zip.cpp | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) diff --git a/GViewCore/src/Unpack/Base64.cpp b/GViewCore/src/Unpack/Base64.cpp index 949c104d..8add8abf 100644 --- a/GViewCore/src/Unpack/Base64.cpp +++ b/GViewCore/src/Unpack/Base64.cpp @@ -49,6 +49,7 @@ bool Decode(BufferView view, Buffer& output) uint32 sequence = 0; uint32 sequenceIndex = 0; char lastEncoded = 0; + uint8 paddingCount = 0; for (uint32 i = 0; i < view.GetLength(); ++i) { @@ -69,6 +70,7 @@ bool Decode(BufferView view, Buffer& output) if (encoded == '=') { // padding decoded = 0; + paddingCount++; } else { decoded = BASE64_DECODE_TABLE[encoded]; CHECK(decoded != -1, false, ""); @@ -90,6 +92,10 @@ bool Decode(BufferView view, Buffer& output) lastEncoded = encoded; } + // trim the trailing bytes + CHECK(paddingCount < 3, false, ""); + output.Resize(output.GetLength() - paddingCount); + return true; } } diff --git a/GViewCore/src/ZIP/zip.cpp b/GViewCore/src/ZIP/zip.cpp index 95fb42e3..9440cad6 100644 --- a/GViewCore/src/ZIP/zip.cpp +++ b/GViewCore/src/ZIP/zip.cpp @@ -452,9 +452,41 @@ bool GetInfo(Utils::DataCache& cache, Info& info) CHECKBK(mz_zip_reader_entry_get_info(internalInfo->reader.value, &zipFile) == MZ_OK, ""); mz_zip_reader_set_pattern(internalInfo->reader.value, nullptr, 1); // do we need a pattern? + size_t entryIndex = internalInfo->entries.size(); auto& entry = internalInfo->entries.emplace_back(); + ConvertZipFileInfoToEntry(zipFile, entry); + std::u8string_view filename = entry.filename; + if (entry.type == EntryType::Directory && filename[filename.size() - 1] == '/') { + filename = { filename.data(), filename.size() - 1 }; + } + + size_t offset = 0; + + while (true) { + size_t pos = filename.find_first_of('/', offset); + + CHECKBK(pos != std::string::npos, ""); + + // add the parent as well if not already present + auto& entry = internalInfo->entries[entryIndex]; + auto parentFilename = entry.filename.substr(0, pos + 1); + + auto it = std::find_if( + internalInfo->entries.begin(), internalInfo->entries.end(), [&](const _Entry& e) -> bool { return e.filename == parentFilename; }); + if (it == internalInfo->entries.end()) { + auto& parentEntry = internalInfo->entries.emplace_back(); + parentEntry.filename = parentFilename; + parentEntry.filename_size = parentFilename.size(); + parentEntry.type = EntryType::Directory; + parentEntry.version_madeby = entry.version_madeby; + parentEntry.version_needed = entry.version_needed; + } + + offset = pos + 1; + } + CHECKBK(mz_zip_reader_goto_next_entry(internalInfo->reader.value) == MZ_OK, ""); } while (true); From 877bf8daf308d1f8679e3fcd78fc2b87db7ed10e Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Sun, 9 Jun 2024 00:09:07 +0300 Subject: [PATCH 14/20] Add DocString to VBA module --- Types/DOC/include/doc.hpp | 4 ++-- Types/DOC/src/DOCFile.cpp | 7 +++++++ Types/DOC/src/doc.cpp | 1 + 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/Types/DOC/include/doc.hpp b/Types/DOC/include/doc.hpp index fc8e2353..d03160e2 100644 --- a/Types/DOC/include/doc.hpp +++ b/Types/DOC/include/doc.hpp @@ -40,7 +40,8 @@ namespace Type ByteStream& Seek(size_t count); - size_t GetCursor() { + size_t GetCursor() + { return cursor; }; @@ -119,7 +120,6 @@ namespace Type uint16 minorVersion; }; - // TODO: add docstring to items in view struct MODULE_Record { String moduleName; String streamName; diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index 840cd933..934a499e 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -1,5 +1,7 @@ #include "doc.hpp" +#include // TODO: remove + namespace GView::Type::DOC { using namespace GView::View::LexicalViewer; @@ -647,6 +649,8 @@ bool DOCFile::PopulateItem(AppCUI::Controls::TreeViewItem item) item.SetText(2, String().Format("%u", decompressed.GetLength())); + item.SetText(3, moduleRecord.docString); + item.SetData(&moduleRecord); moduleRecordIndex++; @@ -668,5 +672,8 @@ void DOCFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewIte AppCUI::Dialogs::MessageBox::ShowError("Error", "Module parse error!"); } GView::App::OpenBuffer(decompressed, moduleRecord->streamName, "", GView::App::OpenMethod::ForceType, "VBA"); + + std::ofstream out("D:\\work\\bd\\samples\\ceva\\docx\\dropped", std::ios::trunc); + out.write((const char*) decompressed.GetData(), decompressed.GetLength()); } } // namespace GView::Type::DOC diff --git a/Types/DOC/src/doc.cpp b/Types/DOC/src/doc.cpp index 4897656b..9b72eaf5 100644 --- a/Types/DOC/src/doc.cpp +++ b/Types/DOC/src/doc.cpp @@ -46,6 +46,7 @@ void CreateContainerView(Reference win, Reference< "n:&Module name,a:l,w:30", "n:&Stream name,a:c,w:40", "n:&Size,a:c,w:15", + "n:&Doc String,a:c,w:100", }); settings.SetEnumerateCallback(win->GetObject()->GetContentType().ToObjectRef()); From 69644f05b16cdf531c27cd185c2ecb3a799de971 Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Sun, 9 Jun 2024 12:18:31 +0300 Subject: [PATCH 15/20] Patch VBA parsing --- Types/DOC/src/DOCFile.cpp | 3 --- Types/VBA/src/VBAFile.cpp | 6 ++++-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index 934a499e..90e09c6c 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -672,8 +672,5 @@ void DOCFile::OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewIte AppCUI::Dialogs::MessageBox::ShowError("Error", "Module parse error!"); } GView::App::OpenBuffer(decompressed, moduleRecord->streamName, "", GView::App::OpenMethod::ForceType, "VBA"); - - std::ofstream out("D:\\work\\bd\\samples\\ceva\\docx\\dropped", std::ios::trunc); - out.write((const char*) decompressed.GetData(), decompressed.GetLength()); } } // namespace GView::Type::DOC diff --git a/Types/VBA/src/VBAFile.cpp b/Types/VBA/src/VBAFile.cpp index c07ad6f1..1c0ed3b9 100644 --- a/Types/VBA/src/VBAFile.cpp +++ b/Types/VBA/src/VBAFile.cpp @@ -25,13 +25,15 @@ uint32 ParseString(GView::View::LexicalViewer::TextParser text, uint32 index) return end + 1; } -UnicodeStringBuilder KEYWORDS[] = { UnicodeStringBuilder("Attribute"), UnicodeStringBuilder("Sub"), UnicodeStringBuilder("Private"), UnicodeStringBuilder("As"), UnicodeStringBuilder("Dim"), UnicodeStringBuilder("End"), +UnicodeStringBuilder KEYWORDS[] = { UnicodeStringBuilder("Attribute"), UnicodeStringBuilder("Sub"), UnicodeStringBuilder("Private"), + UnicodeStringBuilder("Public"), UnicodeStringBuilder("As"), UnicodeStringBuilder("Dim"), + UnicodeStringBuilder("End"), UnicodeStringBuilder("Const"), UnicodeStringBuilder("ByVal"), UnicodeStringBuilder("Set"), UnicodeStringBuilder("While"), UnicodeStringBuilder("Wend"), UnicodeStringBuilder("If"), UnicodeStringBuilder("Then") }; UnicodeStringBuilder KEYWORDS2[] = { UnicodeStringBuilder("True"), UnicodeStringBuilder("False") }; -const char operators[] = "=(),._&$+-*/<>#:"; +const char operators[] = "=(),._&$+-*/<>#\\:"; void VBAFile::AnalyzeText(GView::View::LexicalViewer::SyntaxManager& syntax) { From a6cba1cbc332456edbcba241635b75ad6807632a Mon Sep 17 00:00:00 2001 From: Cosmin765 Date: Mon, 10 Jun 2024 14:45:59 +0300 Subject: [PATCH 16/20] Small refactoring --- Types/DOC/include/doc.hpp | 4 ++-- Types/DOC/src/CFDirEntry.cpp | 2 +- Types/DOC/src/DOCFile.cpp | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/Types/DOC/include/doc.hpp b/Types/DOC/include/doc.hpp index d03160e2..18de32ed 100644 --- a/Types/DOC/include/doc.hpp +++ b/Types/DOC/include/doc.hpp @@ -33,7 +33,7 @@ namespace Type if (cursor + count > size) { count = size - cursor; } - T value = *(T*) ((uint8*) ptr + cursor); + T value = *(T*) ((uint8*) ptr + cursor); cursor += count; return value; } @@ -53,7 +53,7 @@ namespace Type #pragma pack(1) struct CFDirEntry_Data { - uint8 nameUnicode[64]; // the structure starts from here + uint8 nameUnicode[64]; uint16 nameLength; uint8 objectType; uint8 colorFlag; // 0x00 (red) or 0x01 (black) diff --git a/Types/DOC/src/CFDirEntry.cpp b/Types/DOC/src/CFDirEntry.cpp index cbdcdd69..37858b5a 100644 --- a/Types/DOC/src/CFDirEntry.cpp +++ b/Types/DOC/src/CFDirEntry.cpp @@ -39,7 +39,7 @@ bool CFDirEntry::Load(BufferView _directoryData, uint32 _entryId) directoryData = _directoryData; entryId = _entryId; - data = ByteStream(directoryData).Seek(entryId * 128).ReadAs(); + data = ByteStream(directoryData).Seek(entryId * sizeof(CFDirEntry_Data)).ReadAs(); CHECK(data.nameLength % 2 == 0, false, "nameLength"); CHECK(data.objectType == 0x00 || data.objectType == 0x01 || data.objectType == 0x02 || data.objectType == 0x05, false, "objectType"); diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index 90e09c6c..d29c3136 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -647,6 +647,8 @@ bool DOCFile::PopulateItem(AppCUI::Controls::TreeViewItem item) Buffer decompressed; ParseModuleStream(moduleBuffer, moduleRecord, decompressed); + // TODO: add the creation time and modified time of the module stream + item.SetText(2, String().Format("%u", decompressed.GetLength())); item.SetText(3, moduleRecord.docString); From 88a20f50626d5d1edd6880545a27d6b926ef9c07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gheorghi=C8=9B=C4=83=20Mutu?= Date: Fri, 26 Jul 2024 22:40:00 +0300 Subject: [PATCH 17/20] [EML][DOC] # fix build (merge issues) --- Types/DOC/include/doc.hpp | 508 +++++++++++++++++------------------ Types/DOC/src/ByteStream.cpp | 9 +- Types/DOC/src/CFDirEntry.cpp | 10 +- Types/DOC/src/CMakeLists.txt | 4 +- Types/DOC/src/DOCFile.cpp | 2 - Types/VBA/src/VBAFile.cpp | 6 +- 6 files changed, 263 insertions(+), 276 deletions(-) diff --git a/Types/DOC/include/doc.hpp b/Types/DOC/include/doc.hpp index 18de32ed..4f35c56c 100644 --- a/Types/DOC/include/doc.hpp +++ b/Types/DOC/include/doc.hpp @@ -4,267 +4,259 @@ #define NOSTREAM 0xffffffff - -namespace GView +namespace GView::Type::DOC +{ +namespace Panels { -namespace Type + class Information; +} + +class ByteStream { - namespace DOC + private: + void* ptr; + size_t size; + size_t cursor; + + public: + ByteStream(void* ptr, size_t size) : ptr(ptr), size(size), cursor(0){}; + ByteStream(BufferView view) : ptr((void*) view.GetData()), size(view.GetLength()), cursor(0){}; + + BufferView Read(size_t count); + template + T ReadAs() { - namespace Panels - { - class Information; + size_t count = sizeof(T); + if (cursor + count > size) { + count = size - cursor; } + T value = *(T*) ((uint8*) ptr + cursor); + cursor += count; + return value; + } - class ByteStream - { - private: - void* ptr; - size_t size; - size_t cursor; - - public: - ByteStream(void* ptr, size_t size) : ptr(ptr), size(size), cursor(0) {}; - ByteStream(BufferView view) : ptr((void*) view.GetData()), size(view.GetLength()), cursor(0) {}; - - BufferView Read(size_t count); - template T ReadAs() { - size_t count = sizeof(T); - if (cursor + count > size) { - count = size - cursor; - } - T value = *(T*) ((uint8*) ptr + cursor); - cursor += count; - return value; - } - - ByteStream& Seek(size_t count); - - size_t GetCursor() - { - return cursor; - }; - - size_t GetSize() - { - return size; - } - }; - - #pragma pack(1) - struct CFDirEntry_Data { - uint8 nameUnicode[64]; - uint16 nameLength; - uint8 objectType; - uint8 colorFlag; // 0x00 (red) or 0x01 (black) - uint32 leftSiblingId; - uint32 rightSiblingId; - uint32 childId; - uint8 clsid[16]; - uint32 stateBits; - uint64 creationTime; - uint64 modifiedTime; - uint32 startingSectorLocation; - uint64 streamSize; - }; - - class CFDirEntry - { - private: - void AppendChildren(uint32 childId); - - public: - CFDirEntry(); - CFDirEntry(BufferView _directoryData, uint32 _entryId); - - bool Load(BufferView _directoryData, uint32 _entryId); - void BuildStorageTree(); - bool FindChildByName(std::u16string_view entryName, CFDirEntry& entry); - - private: - BufferView directoryData; - bool initialized = false; - - public: - uint32 entryId{}; - CFDirEntry_Data data{}; - std::vector children; - }; - - // REFERENCE records - struct REFERENCECONTROL_Record { - uint32 recordIndex; - String libidTwiddled; - String nameRecordExtended; - String libidExtended; - BufferView originalTypeLib; - uint32 cookie; - }; - - struct REFERENCEORIGINAL_Record { - uint32 recordIndex; - String libidOriginal; - REFERENCECONTROL_Record referenceControl; - }; - - struct REFERENCEREGISTERED_Record { - uint32 recordIndex; - String libid; - }; - - struct REFERENCEPROJECT_Record { - uint32 recordIndex; - String libidAbsolute; - String libidRelative; - uint32 majorVersion; - uint16 minorVersion; - }; - - struct MODULE_Record { - String moduleName; - String streamName; - String docString; - uint32 textOffset; - uint32 helpContext; - }; - - enum SysKind { Win16Bit = 0, Win32Bit, Macintosh, Win64Bit }; - - - class DOCFile : public TypeInterface, public View::ContainerViewer::EnumerateInterface, public View::ContainerViewer::OpenItemInterface - { - private: - constexpr static uint8 CF_HEADER_SIGNATURE[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; - constexpr static size_t DIFAT_LOCATIONS_COUNT = 109; - - friend class Panels::Information; - - // displayed info about the file - uint16 cfMinorVersion; - uint16 cfMajorVersion; - uint32 transactionSignatureNumber; - uint32 numberOfFatSectors; - uint32 numberOfMiniFatSectors; - uint32 numberOfDifatSectors; - uint32 firstDirectorySectorLocation; - uint32 firstMiniFatSectorLocation; - uint32 firstDifatSectorLocation; - - uint32 dirMajorVersion; - uint16 dirMinorVersion; - SysKind sysKind; - String projectName; - String docString; - String helpFile; - String constants; - uint16 modulesCount; - - // compound files (vbaProject.bin) helper member variables - AppCUI::Utils::Buffer vbaProjectBuffer; - AppCUI::Utils::Buffer FAT; - AppCUI::Utils::Buffer miniStream; - AppCUI::Utils::Buffer miniFAT; - - public: - uint16 sectorSize{}; - uint16 miniSectorSize{}; - uint16 miniStreamCutoffSize{}; - - private: - std::u16string modulesPath; - CFDirEntry root; - - // VBA streams helper member variables - std::vector referenceControlRecords; - std::vector referenceOriginalRecords; - std::vector referenceRegisteredRecords; - std::vector referenceProjectRecords; - - std::vector moduleRecords; - uint32 moduleRecordIndex = 0; - - public: - DOCFile(); - virtual ~DOCFile() override - { - } - - virtual std::string_view GetTypeName() override - { - return "DOC"; - } - virtual void RunCommand(std::string_view command) override - { - // here - } - - public: - - bool ProcessData(); - Reference selectionZoneInterface; - - uint32 GetSelectionZonesCount() override - { - CHECK(selectionZoneInterface.IsValid(), 0, ""); - return selectionZoneInterface->GetSelectionZonesCount(); - } - - TypeInterface::SelectionZone GetSelectionZone(uint32 index) override - { - static auto d = TypeInterface::SelectionZone{ 0, 0 }; - CHECK(selectionZoneInterface.IsValid(), d, ""); - CHECK(index < selectionZoneInterface->GetSelectionZonesCount(), d, ""); - - return selectionZoneInterface->GetSelectionZone(index); - } - - // View::ContainerViewer::EnumerateInterface - virtual bool BeginIteration(std::u16string_view path, AppCUI::Controls::TreeViewItem parent) override; - virtual bool PopulateItem(AppCUI::Controls::TreeViewItem item) override; - - // View::ContainerViewer::OpenItemInterface - virtual void OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewItem item) override; - - // compound files (vbaProject.bin) helper methods - bool ParseVBAProject(); - Buffer OpenCFStream(const CFDirEntry& entry); - Buffer OpenCFStream(uint32 sect, uint32 size, bool useMiniFAT); - void DisplayAllVBAProjectFiles(CFDirEntry& entry); - - // VBA streams helper methods - bool DecompressStream(BufferView bv, Buffer& decompressed); - bool ParseUncompressedDirStream(BufferView bv); - bool ParseModuleStream(BufferView bv, const MODULE_Record& moduleRecord, Buffer& text); - bool FindModulesPath(const CFDirEntry& entry, UnicodeStringBuilder& path); - }; - - namespace Panels + ByteStream& Seek(size_t count); + + size_t GetCursor() + { + return cursor; + }; + + size_t GetSize() + { + return size; + } +}; + +#pragma pack(1) +struct CFDirEntry_Data { + uint8 nameUnicode[64]; + uint16 nameLength; + uint8 objectType; + uint8 colorFlag; // 0x00 (red) or 0x01 (black) + uint32 leftSiblingId; + uint32 rightSiblingId; + uint32 childId; + uint8 clsid[16]; + uint32 stateBits; + uint64 creationTime; + uint64 modifiedTime; + uint32 startingSectorLocation; + uint64 streamSize; +}; + +class CFDirEntry +{ + private: + void AppendChildren(uint32 childId); + + public: + CFDirEntry(); + CFDirEntry(BufferView _directoryData, uint32 _entryId); + + bool Load(BufferView _directoryData, uint32 _entryId); + void BuildStorageTree(); + bool FindChildByName(std::u16string_view entryName, CFDirEntry& entry); + + private: + BufferView directoryData; + bool initialized = false; + + public: + uint32 entryId{}; + CFDirEntry_Data data{}; + std::vector children; +}; + +// REFERENCE records +struct REFERENCECONTROL_Record { + uint32 recordIndex; + String libidTwiddled; + String nameRecordExtended; + String libidExtended; + BufferView originalTypeLib; + uint32 cookie; +}; + +struct REFERENCEORIGINAL_Record { + uint32 recordIndex; + String libidOriginal; + REFERENCECONTROL_Record referenceControl; +}; + +struct REFERENCEREGISTERED_Record { + uint32 recordIndex; + String libid; +}; + +struct REFERENCEPROJECT_Record { + uint32 recordIndex; + String libidAbsolute; + String libidRelative; + uint32 majorVersion; + uint16 minorVersion; +}; + +struct MODULE_Record { + String moduleName; + String streamName; + String docString; + uint32 textOffset; + uint32 helpContext; +}; + +enum SysKind { Win16Bit = 0, Win32Bit, Macintosh, Win64Bit }; + +class DOCFile : public TypeInterface, public View::ContainerViewer::EnumerateInterface, public View::ContainerViewer::OpenItemInterface +{ + private: + constexpr static uint8 CF_HEADER_SIGNATURE[] = { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 }; + constexpr static size_t DIFAT_LOCATIONS_COUNT = 109; + + friend class Panels::Information; + + // displayed info about the file + uint16 cfMinorVersion; + uint16 cfMajorVersion; + uint32 transactionSignatureNumber; + uint32 numberOfFatSectors; + uint32 numberOfMiniFatSectors; + uint32 numberOfDifatSectors; + uint32 firstDirectorySectorLocation; + uint32 firstMiniFatSectorLocation; + uint32 firstDifatSectorLocation; + + uint32 dirMajorVersion; + uint16 dirMinorVersion; + SysKind sysKind; + String projectName; + String docString; + String helpFile; + String constants; + uint16 modulesCount; + + // compound files (vbaProject.bin) helper member variables + AppCUI::Utils::Buffer vbaProjectBuffer; + AppCUI::Utils::Buffer FAT; + AppCUI::Utils::Buffer miniStream; + AppCUI::Utils::Buffer miniFAT; + + public: + uint16 sectorSize{}; + uint16 miniSectorSize{}; + uint16 miniStreamCutoffSize{}; + + private: + std::u16string modulesPath; + CFDirEntry root; + + // VBA streams helper member variables + std::vector referenceControlRecords; + std::vector referenceOriginalRecords; + std::vector referenceRegisteredRecords; + std::vector referenceProjectRecords; + + std::vector moduleRecords; + uint32 moduleRecordIndex = 0; + + public: + DOCFile(); + virtual ~DOCFile() override + { + } + + virtual std::string_view GetTypeName() override + { + return "DOC"; + } + virtual void RunCommand(std::string_view command) override + { + // here + } + + public: + bool ProcessData(); + Reference selectionZoneInterface; + + uint32 GetSelectionZonesCount() override + { + CHECK(selectionZoneInterface.IsValid(), 0, ""); + return selectionZoneInterface->GetSelectionZonesCount(); + } + + TypeInterface::SelectionZone GetSelectionZone(uint32 index) override + { + static auto d = TypeInterface::SelectionZone{ 0, 0 }; + CHECK(selectionZoneInterface.IsValid(), d, ""); + CHECK(index < selectionZoneInterface->GetSelectionZonesCount(), d, ""); + + return selectionZoneInterface->GetSelectionZone(index); + } + + // View::ContainerViewer::EnumerateInterface + virtual bool BeginIteration(std::u16string_view path, AppCUI::Controls::TreeViewItem parent) override; + virtual bool PopulateItem(AppCUI::Controls::TreeViewItem item) override; + + // View::ContainerViewer::OpenItemInterface + virtual void OnOpenItem(std::u16string_view path, AppCUI::Controls::TreeViewItem item) override; + + // compound files (vbaProject.bin) helper methods + bool ParseVBAProject(); + Buffer OpenCFStream(const CFDirEntry& entry); + Buffer OpenCFStream(uint32 sect, uint32 size, bool useMiniFAT); + void DisplayAllVBAProjectFiles(CFDirEntry& entry); + + // VBA streams helper methods + bool DecompressStream(BufferView bv, Buffer& decompressed); + bool ParseUncompressedDirStream(BufferView bv); + bool ParseModuleStream(BufferView bv, const MODULE_Record& moduleRecord, Buffer& text); + bool FindModulesPath(const CFDirEntry& entry, UnicodeStringBuilder& path); +}; + +namespace Panels +{ + class Information : public AppCUI::Controls::TabPage + { + Reference doc; + Reference general; + Reference compoundFileInfo; + Reference vbaStreamsInfo; + + inline static const auto dec = NumericFormat{ NumericFormatFlags::None, 10, 3, ',' }; + inline static const auto hex = NumericFormat{ NumericFormatFlags::HexPrefix, 16 }; + + void UpdateGeneralInformation(); + void UpdateIssues(); + void RecomputePanelsPositions(); + + public: + Information(Reference doc); + + void Update(); + virtual void OnAfterResize(int newWidth, int newHeight) override { - class Information : public AppCUI::Controls::TabPage - { - Reference doc; - Reference general; - Reference compoundFileInfo; - Reference vbaStreamsInfo; - - inline static const auto dec = NumericFormat{ NumericFormatFlags::None, 10, 3, ',' }; - inline static const auto hex = NumericFormat{ NumericFormatFlags::HexPrefix, 16 }; - - void UpdateGeneralInformation(); - void UpdateIssues(); - void RecomputePanelsPositions(); - - public: - Information(Reference doc); - - void Update(); - virtual void OnAfterResize(int newWidth, int newHeight) override - { - RecomputePanelsPositions(); - } - }; - }; // namespace Panels - - } // namespace DOC -} // namespace Type -} // namespace GView + RecomputePanelsPositions(); + } + }; +}; // namespace Panels +} // namespace GView::Type::DOC diff --git a/Types/DOC/src/ByteStream.cpp b/Types/DOC/src/ByteStream.cpp index e964a32c..858427b9 100644 --- a/Types/DOC/src/ByteStream.cpp +++ b/Types/DOC/src/ByteStream.cpp @@ -1,16 +1,14 @@ #include "doc.hpp" - -using namespace GView::Type::DOC; - - +namespace GView::Type::DOC +{ BufferView ByteStream::Read(size_t count) { if (cursor + count > size) { count = size - cursor; } - BufferView view((uint8*)ptr + cursor, count); + BufferView view((uint8*) ptr + cursor, count); cursor += count; return view; @@ -24,3 +22,4 @@ ByteStream& ByteStream::Seek(size_t count) cursor += count; return *this; } +} // namespace GView::Type::DOC diff --git a/Types/DOC/src/CFDirEntry.cpp b/Types/DOC/src/CFDirEntry.cpp index 37858b5a..a9776517 100644 --- a/Types/DOC/src/CFDirEntry.cpp +++ b/Types/DOC/src/CFDirEntry.cpp @@ -1,9 +1,7 @@ #include "doc.hpp" - -using namespace GView::Type::DOC; - - +namespace GView::Type::DOC +{ CFDirEntry::CFDirEntry() { } @@ -31,7 +29,6 @@ void CFDirEntry::AppendChildren(uint32 childId) children[childIndex] = child; }; - bool CFDirEntry::Load(BufferView _directoryData, uint32 _entryId) { CHECK(!initialized, false, "already initialized"); @@ -48,7 +45,6 @@ bool CFDirEntry::Load(BufferView _directoryData, uint32 _entryId) return true; } - void CFDirEntry::BuildStorageTree() { if (data.childId == NOSTREAM) { @@ -80,4 +76,4 @@ bool CFDirEntry::FindChildByName(std::u16string_view entryName, CFDirEntry& entr } return false; } - +} // namespace GView::Type::DOC diff --git a/Types/DOC/src/CMakeLists.txt b/Types/DOC/src/CMakeLists.txt index 99544f80..14d643bd 100644 --- a/Types/DOC/src/CMakeLists.txt +++ b/Types/DOC/src/CMakeLists.txt @@ -1,4 +1,6 @@ target_sources(DOC PRIVATE doc.cpp DOCFile.cpp - PanelInformation.cpp) \ No newline at end of file + PanelInformation.cpp + ByteStream.cpp + CFDirEntry.cpp) \ No newline at end of file diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index d29c3136..e1927b66 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -11,10 +11,8 @@ using namespace GView::View::LexicalViewer; #define FATSECT 0xfffffffd #define DIFSECT 0xfffffffc - DOCFile::DOCFile() { - } bool DOCFile::DecompressStream(BufferView bv, Buffer& decompressed) diff --git a/Types/VBA/src/VBAFile.cpp b/Types/VBA/src/VBAFile.cpp index 1c0ed3b9..8db15688 100644 --- a/Types/VBA/src/VBAFile.cpp +++ b/Types/VBA/src/VBAFile.cpp @@ -112,16 +112,16 @@ void VBAFile::AnalyzeText(GView::View::LexicalViewer::SyntaxManager& syntax) } if (c == '\r' || c == '\n') { - end = syntax.text.ParseUntillStartOfNextLine(start); + end = syntax.text.ParseUntilStartOfNextLine(start); presetAlignament = TokenAlignament::StartsOnNewLine; start = end; continue; } if (c == '\'') { - end = syntax.text.ParseUntillEndOfLine(start); + end = syntax.text.ParseUntilEndOfLine(start); syntax.tokens.Add(1, start, end, TokenColor::Comment, presetAlignament | TokenAlignament::NewLineAfter); - start = syntax.text.ParseUntillStartOfNextLine(end); + start = syntax.text.ParseUntilStartOfNextLine(end); continue; } break; From c84639ff7642e983aedd65e397ea37c8a1ffa40b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gheorghi=C8=9B=C4=83=20Mutu?= Date: Fri, 26 Jul 2024 23:08:37 +0300 Subject: [PATCH 18/20] [DOC] # fix ubuntu build (header for log2 and ceil functions) --- Types/DOC/src/DOCFile.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index e1927b66..42df0a29 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -1,6 +1,7 @@ #include "doc.hpp" #include // TODO: remove +#include namespace GView::Type::DOC { From 520a32ef276276dfeb1d4f0109c0f6e5a35824db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gheorghi=C8=9B=C4=83=20Mutu?= Date: Tue, 30 Jul 2024 20:35:16 +0300 Subject: [PATCH 19/20] [DOC] # fix CodeQL warnings --- Types/DOC/src/DOCFile.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Types/DOC/src/DOCFile.cpp b/Types/DOC/src/DOCFile.cpp index 42df0a29..98fe80ba 100644 --- a/Types/DOC/src/DOCFile.cpp +++ b/Types/DOC/src/DOCFile.cpp @@ -1,6 +1,4 @@ #include "doc.hpp" - -#include // TODO: remove #include namespace GView::Type::DOC @@ -572,7 +570,7 @@ bool DOCFile::ParseVBAProject() } // get the sector data - size_t byteOffset = sectorSize * (sect + 1); + size_t byteOffset = sectorSize * (sect + 1ULL); BufferView sector(vbaProjectBuffer.GetData() + byteOffset, sectorSize); FAT.Add(sector); } @@ -648,7 +646,7 @@ bool DOCFile::PopulateItem(AppCUI::Controls::TreeViewItem item) // TODO: add the creation time and modified time of the module stream - item.SetText(2, String().Format("%u", decompressed.GetLength())); + item.SetText(2, String().Format("%llu", decompressed.GetLength())); item.SetText(3, moduleRecord.docString); From 9da37ce0cfeed10fc4eeb1d5abd128837fba3093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gheorghi=C8=9B=C4=83=20Mutu?= Date: Tue, 30 Jul 2024 21:35:42 +0300 Subject: [PATCH 20/20] [EML] ^ AppCUI update --- AppCUI | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/AppCUI b/AppCUI index 13fdd6e9..fb683857 160000 --- a/AppCUI +++ b/AppCUI @@ -1 +1 @@ -Subproject commit 13fdd6e9f422e9f1f3e679c051d85d0ca646971b +Subproject commit fb6838578a749adaf10e7a8ad65d6c8cca308692