Skip to content

Commit

Permalink
[OfficeExtractor] Only try to extract content if PlainText is requested
Browse files Browse the repository at this point in the history
  • Loading branch information
StefanBruens committed Aug 10, 2024
1 parent 30a4315 commit a2666fa
Showing 1 changed file with 13 additions and 0 deletions.
13 changes: 13 additions & 0 deletions src/extractors/officeextractor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,17 @@ void OfficeExtractor::extract(ExtractionResult* result)
args << QStringLiteral("-s") << QStringLiteral("cp1252"); // FIXME: Store somewhere a map between the user's language and the encoding of the Windows files it may use ?
args << QStringLiteral("-d") << QStringLiteral("utf8");

const bool extractPlainText = result->inputFlags() & ExtractionResult::ExtractPlainText;

const QString fileUrl = result->inputUrl();
const QString mimeType = result->inputMimetype();
if (mimeType == QLatin1String("application/msword")) {
result->addType(Type::Document);

if (!extractPlainText) {
return;
}

args << QStringLiteral("-w");
contents = textFromFile(fileUrl, m_catdoc, args);

Expand All @@ -69,6 +75,10 @@ void OfficeExtractor::extract(ExtractionResult* result)
result->addType(Type::Document);
result->addType(Type::Spreadsheet);

if (!extractPlainText) {
return;
}

args << QStringLiteral("-c") << QStringLiteral(" ");
args << QStringLiteral("-b") << QStringLiteral(" ");
args << QStringLiteral("-q") << QStringLiteral("0");
Expand All @@ -77,6 +87,9 @@ void OfficeExtractor::extract(ExtractionResult* result)
result->addType(Type::Document);
result->addType(Type::Presentation);

if (!extractPlainText) {
return;
}
contents = textFromFile(fileUrl, m_catppt, args);
}

Expand Down

0 comments on commit a2666fa

Please sign in to comment.