Skip to content

Commit

Permalink
[OfficeExtractor] Do not add word/line count if nothing has been extr…
Browse files Browse the repository at this point in the history
…acted

Otherwise, without installed helpers an incorrect count of 0 would be
recorded.
  • Loading branch information
StefanBruens committed Aug 10, 2024
1 parent a2666fa commit 2165489
Showing 1 changed file with 16 additions and 19 deletions.
35 changes: 16 additions & 19 deletions src/extractors/officeextractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,6 @@ QStringList OfficeExtractor::mimetypes() const
void OfficeExtractor::extract(ExtractionResult* result)
{
QStringList args;
QString contents;

args << QStringLiteral("-s") << QStringLiteral("cp1252"); // FIXME: Store somewhere a map between the user's language and the encoding of the Windows files it may use ?
args << QStringLiteral("-d") << QStringLiteral("utf8");
Expand All @@ -62,15 +61,16 @@ void OfficeExtractor::extract(ExtractionResult* result)
}

args << QStringLiteral("-w");
contents = textFromFile(fileUrl, m_catdoc, args);

// Now that we have the plain text content, count words, lines and characters
// (original code from plaintextextractor.cpp, authored by Vishesh Handa)
int lines = contents.count(QLatin1Char('\n'));
int words = contents.count(QRegularExpression(QStringLiteral("\\b\\w+\\b"), QRegularExpression::UseUnicodePropertiesOption));

result->add(Property::WordCount, words);
result->add(Property::LineCount, lines);
if (const auto contents = textFromFile(fileUrl, m_catdoc, args); !contents.isEmpty()) {
// Now that we have the plain text content, count words, lines and characters
// (original code from plaintextextractor.cpp, authored by Vishesh Handa)
int lines = contents.count(QLatin1Char('\n'));
int words = contents.count(QRegularExpression(QStringLiteral("\\b\\w+\\b"), QRegularExpression::UseUnicodePropertiesOption));

result->add(Property::WordCount, words);
result->add(Property::LineCount, lines);
result->append(contents);
}
} else if (mimeType == QLatin1String("application/vnd.ms-excel")) {
result->addType(Type::Document);
result->addType(Type::Spreadsheet);
Expand All @@ -82,24 +82,21 @@ void OfficeExtractor::extract(ExtractionResult* result)
args << QStringLiteral("-c") << QStringLiteral(" ");
args << QStringLiteral("-b") << QStringLiteral(" ");
args << QStringLiteral("-q") << QStringLiteral("0");
contents = textFromFile(fileUrl, m_xls2csv, args);
if (const auto contents = textFromFile(fileUrl, m_xls2csv, args); !contents.isEmpty()) {
result->append(contents);
}
} else if (mimeType == QLatin1String("application/vnd.ms-powerpoint")) {
result->addType(Type::Document);
result->addType(Type::Presentation);

if (!extractPlainText) {
return;
}
contents = textFromFile(fileUrl, m_catppt, args);
}

if (contents.isEmpty()) {
return;
if (const auto contents = textFromFile(fileUrl, m_catppt, args); !contents.isEmpty()) {
result->append(contents);
}
}

result->append(contents);

return;
}

QString OfficeExtractor::textFromFile(const QString& fileUrl, const QString& command, QStringList& arguments)
Expand Down

0 comments on commit 2165489

Please sign in to comment.