Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into #1953_APKParser
Browse files Browse the repository at this point in the history
  • Loading branch information
wladimirleite committed Oct 30, 2023
2 parents ee2d51d + 8fb6cd2 commit 8911e11
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 56 deletions.
26 changes: 12 additions & 14 deletions iped-app/src/main/java/iped/app/processing/ui/ProgressFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicLong;
import java.util.TreeMap;

import javax.swing.BorderFactory;
import javax.swing.BoxLayout;
Expand Down Expand Up @@ -93,6 +92,7 @@ public class ProgressFrame extends JFrame implements PropertyChangeListener, Act
private boolean paused = false;
private String decodingDir = null;
private long physicalMemory;
private static final Map<String, Long> timesPerParser = new TreeMap<String, Long>();

private static class RestrictedSizeLabel extends JLabel {

Expand Down Expand Up @@ -360,7 +360,7 @@ private String getTaskTimes() {
if (task.isEnabled()) {
long time = taskTimes[i];
long sec = time / (1000000 * workers.length);
int pct = (int) ((100 * time) / totalTime);
int pct = (int) ((100 * time + totalTime / 2) / totalTime); // Round percentage

startRow(msg, task.getName(), pct);
addCell(msg, nf.format(sec) + "s", Align.RIGHT);
Expand All @@ -377,28 +377,26 @@ private String getTaskTimes() {
}

private String getParserTimes() {
if (ParsingTask.times.isEmpty())
ParsingTask.copyTimesPerParser(timesPerParser);
if (timesPerParser.isEmpty())
return "";
StringBuilder msg = new StringBuilder();
startTable(msg);
addTitle(msg, 3, Messages.getString("ProgressFrame.ParserTimes"));

long totalTime = 0;
for (Worker worker : workers)
for (AbstractTask task : worker.tasks)
if (task.getClass().equals(ParsingTask.class))
totalTime += task.getTaskTime();
for (long parserTime : timesPerParser.values()) {
totalTime += parserTime;
}
if (totalTime < 1)
totalTime = 1;

for (Object o : ParsingTask.times.entrySet().toArray()) {
@SuppressWarnings("unchecked")
Entry<String, AtomicLong> e = (Entry<String, AtomicLong>) o;
long time = e.getValue().get();
for (String parserName : timesPerParser.keySet()) {
long time = timesPerParser.get(parserName);
long sec = time / (1000000 * workers.length);
int pct = (int) ((100 * time) / totalTime);
int pct = (int) ((100 * time + totalTime / 2) / totalTime); // Round percentage

startRow(msg, e.getKey(), pct);
startRow(msg, parserName, pct);
addCell(msg, nf.format(sec) + "s", Align.RIGHT);
finishRow(msg, pct + "%", Align.RIGHT);
}
Expand Down
11 changes: 11 additions & 0 deletions iped-engine/src/main/java/iped/engine/core/Manager.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Method;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
Expand Down Expand Up @@ -158,6 +159,16 @@ public class Manager {
private static final String appWinExeFileName = "IPED-SearchApp.exe";

static {

// installs the AmazonCorrettoCryptoProvider if it is available
try {
Class<?> clazz = Class.forName("com.amazon.corretto.crypto.provider.AmazonCorrettoCryptoProvider");
Method method = clazz.getMethod("install");
method.invoke(null);
} catch (Exception e) {
LOGGER.debug("AmazonCorrettoCryptoProvider not installed", e);
}

if (Security.getProvider(BouncyCastleProvider.PROVIDER_NAME) == null) {
Security.addProvider(new BouncyCastleProvider());
}
Expand Down
60 changes: 56 additions & 4 deletions iped-engine/src/main/java/iped/engine/core/Statistics.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.lang.management.RuntimeMXBean;
import java.util.Date;
import java.util.HashMap;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;

import javax.swing.JOptionPane;
Expand All @@ -34,6 +35,7 @@
import iped.engine.localization.Messages;
import iped.engine.lucene.ConfiguredFSDirectory;
import iped.engine.task.ExportFileTask;
import iped.engine.task.ParsingTask;
import iped.engine.task.carver.BaseCarveTask;
import iped.engine.task.index.IndexItem;
import iped.engine.util.Util;
Expand Down Expand Up @@ -233,23 +235,73 @@ public void logStatistics(Manager manager) throws Exception {
int carvedIgnored = getCorruptCarveIgnored();
int ignored = getIgnored();

// Processing times per task
long totalTime = 0;
Worker[] workers = manager.getWorkers();
long[] taskTimes = new long[workers[0].tasks.size()];
for (Worker worker : workers) {
for (int i = 0; i < taskTimes.length; i++) {
taskTimes[i] += worker.tasks.get(i).getTaskTime();
totalTime += worker.tasks.get(i).getTaskTime();
long t = worker.tasks.get(i).getTaskTime();
taskTimes[i] += t;
totalTime += t;
}
}
LocalConfig localConfig = ConfigurationManager.get().findObject(LocalConfig.class);
totalTime = totalTime / (1000000 * localConfig.getNumThreads());
LOGGER.info("Processing Times per Task:");
StringBuilder sb = new StringBuilder();
sb.append(String.format("%-30s", "TASK"));
sb.append(String.format(" %7s", "TIME(s)"));
sb.append(String.format(" %6s", "PCT(%)"));
LOGGER.info(sb.toString());
sb.setLength(0);
sb.append(String.format("%-30s", "").replace(' ', '='));
sb.append(" ").append(String.format("%7s", "").replace(' ', '='));
sb.append(" ").append(String.format("%6s", "").replace(' ', '='));
LOGGER.info(sb.toString());
sb.setLength(0);
for (int i = 0; i < taskTimes.length; i++) {
long sec = taskTimes[i] / (1000000 * localConfig.getNumThreads());
LOGGER.info(workers[0].tasks.get(i).getName() + ":\tProcessing Time:\t" + sec + "s (" //$NON-NLS-1$ //$NON-NLS-2$
+ Math.round((100f * sec) / totalTime) + "%)"); //$NON-NLS-1$
sb.append(String.format("%-30s", workers[0].tasks.get(i).getName()));
sb.append(String.format(" %7d", sec));
sb.append(String.format(" %6d", Math.round((100f * sec) / totalTime)));
LOGGER.info(sb.toString());
sb.setLength(0);
}

// Processing times per parser
TreeMap<String, Long> timesPerParser = new TreeMap<String, Long>();
ParsingTask.copyTimesPerParser(timesPerParser);
if (!timesPerParser.isEmpty()) {
totalTime = 0;
for (long parserTime : timesPerParser.values()) {
totalTime += parserTime;
}
if (totalTime < 1)
totalTime = 1;
sb = new StringBuilder();
LOGGER.info("Processing Times per Parser:");
sb.append(String.format("%-30s", "PARSER"));
sb.append(String.format(" %7s", "TIME(s)"));
sb.append(String.format(" %6s", "PCT(%)"));
LOGGER.info(sb.toString());
sb.setLength(0);
sb.append(String.format("%-30s", "").replace(' ', '='));
sb.append(" ").append(String.format("%7s", "").replace(' ', '='));
sb.append(" ").append(String.format("%6s", "").replace(' ', '='));
LOGGER.info(sb.toString());
sb.setLength(0);
for (String parserName : timesPerParser.keySet()) {
long time = timesPerParser.get(parserName);
long sec = time / (1000000 * workers.length);
sb.append(String.format("%-30s", parserName));
sb.append(String.format(" %7d", sec));
sb.append(String.format(" %6d", Math.round(100.0 * time / totalTime)));
LOGGER.info(sb.toString());
sb.setLength(0);
}
}

int numDocs;
try (IndexReader reader = DirectoryReader.open(ConfiguredFSDirectory.open(indexDir))) {
numDocs = reader.numDocs();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,9 @@ public SeekableInputStream getSeekableInputStream(String path) throws IOExceptio
}
synchronized (filesCache) {
tmp = filesCache.get(path);
}
if (tmp != null) {
return new SeekableFileInputStream(tmp.toFile());
if (tmp != null) {
return new SeekableFileInputStream(tmp.toFile());
}
}

ZipArchiveEntry zae;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ public void init(ConfigurationManager configurationManager) throws Exception {
Class.forName("edu.stanford.nlp.ie.crf.CRFClassifier"); //$NON-NLS-1$

} catch (ClassNotFoundException e) {
LOGGER.error("StanfordCoreNLP not found. Did you put the jar in the optional lib folder?");
LOGGER.error("StanfordCoreNLP not found. Did you put the jar in 'plugins' folder?");
nerConfig.setEnabled(false);
return;
}
Expand All @@ -86,7 +86,7 @@ public void init(ConfigurationManager configurationManager) throws Exception {

URL modelResource = this.getClass().getResource("/" + modelPath); //$NON-NLS-1$
if (modelResource == null) {
LOGGER.error(modelPath + " not found. Did you put the model in the optional lib folder?");
LOGGER.error(modelPath + " not found. Did you put the model in 'plugins' folder?");
nerConfig.setEnabled(false);
return;
}
Expand Down
63 changes: 30 additions & 33 deletions iped-engine/src/main/java/iped/engine/task/ParsingTask.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright 2012-2014, Luis Filipe da Cruz Nassif
*
* This file is part of Indexador e Processador de Evidências Digitais (IPED).
* This file is part of Indexador e Processador de Evidências Digitais (IPED).
*
* IPED is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand All @@ -23,13 +23,11 @@
import java.io.InputStream;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
Expand Down Expand Up @@ -155,7 +153,7 @@ public class ParsingTask extends ThumbTask implements EmbeddedDocumentExtractor
private static int max_expanding_containers;

public static AtomicLong totalText = new AtomicLong();
public static Map<String, AtomicLong> times = Collections.synchronizedMap(new TreeMap<String, AtomicLong>());
private static final Map<String, Long> timesPerParser = new HashMap<String, Long>();

private static Map<Integer, ZipBombStats> zipBombStatsMap = new ConcurrentHashMap<>();
private static final Set<MediaType> typesToCheckZipBomb = getTypesToCheckZipbomb();
Expand All @@ -170,11 +168,11 @@ public class ParsingTask extends ThumbTask implements EmbeddedDocumentExtractor
private boolean extractEmbedded;
private volatile ParsingReader reader;
private String firstParentPath = null;
private Map<Integer, Long> timeInDepth = new ConcurrentHashMap<>();
private volatile int depth = 0;
private volatile long subitemsTime;
private Map<Object, ParentInfo> idToItemMap = new HashMap<>();
private int numSubitems = 0;
private StandardParser autoParser;
private long minItemSizeToFragment;

private static Set<MediaType> getTypesToCheckZipbomb() {
HashSet<MediaType> set = new HashSet<>();
Expand Down Expand Up @@ -307,34 +305,27 @@ public void process(IItem evidence) throws Exception {
return;
}

String parserName = getParserName(parser, evidence.getMetadata().get(Metadata.CONTENT_TYPE));
AtomicLong time = times.get(parserName);
if (time == null) {
time = new AtomicLong();
times.put(parserName, time);
}

SplitLargeBinaryConfig splitConfig = ConfigurationManager.get()
.findObject(SplitLargeBinaryConfig.class);
if (((Item) evidence).getTextCache() == null
&& ((evidence.getLength() == null || evidence.getLength() < splitConfig.getMinItemSizeToFragment())
|| StandardParser.isSpecificParser(parser))) {
&& ((evidence.getLength() == null || evidence.getLength() < minItemSizeToFragment)
|| StandardParser.isSpecificParser(parser))) {
ParsingTask task = null;
try {
depth++;
ParsingTask task = new ParsingTask(worker, autoParser);
task = new ParsingTask(worker, autoParser);
task.parsingConfig = this.parsingConfig;
task.expandConfig = this.expandConfig;
task.depth = depth;
task.timeInDepth = timeInDepth;
task.safeProcess(evidence);

} finally {
depth--;
String parserName = getParserName(parser, evidence.getMetadata().get(Metadata.CONTENT_TYPE));
long st = task == null ? 0 : task.subitemsTime;
long diff = System.nanoTime() / 1000 - start;
Long subitemsTime = timeInDepth.remove(depth + 1);
if (subitemsTime == null)
subitemsTime = 0L;
time.addAndGet(diff - subitemsTime);
if (diff < st) {
LOGGER.warn("{} Negative Parsing Time: {} {} Diff={} SubItemsTime={}",
Thread.currentThread().getName(), evidence.getPath(), parserName, diff, st);
}
synchronized (timesPerParser) {
timesPerParser.merge(parserName, diff - st, Long::sum);
}
}

}
Expand Down Expand Up @@ -694,22 +685,19 @@ public void parseEmbedded(InputStream inputStream, ContentHandler handler, Metad

// pausa contagem de timeout do pai antes de extrair e processar subitem
if (reader.setTimeoutPaused(true)) {
long start = System.nanoTime() / 1000;
try {
long start = System.nanoTime() / 1000;

ProcessTime time = ProcessTime.AUTO;

worker.processNewItem(subItem, time);
Statistics.get().incSubitemsDiscovered();
numSubitems++;

long diff = (System.nanoTime() / 1000) - start;
Long prevTime = timeInDepth.get(depth);
if (prevTime == null)
prevTime = 0L;
timeInDepth.put(depth, prevTime + diff);

} finally {
// Store time spent on subitems processing
subitemsTime += System.nanoTime() / 1000 - start;

// despausa contador de timeout do pai somente após processar subitem
reader.setTimeoutPaused(false);

Expand Down Expand Up @@ -788,6 +776,9 @@ public void init(ConfigurationManager configurationManager) {
parsingConfig = configurationManager.findObject(ParsingTaskConfig.class);
expandConfig = configurationManager.findObject(CategoryToExpandConfig.class);

SplitLargeBinaryConfig splitConfig = configurationManager.findObject(SplitLargeBinaryConfig.class);
minItemSizeToFragment = splitConfig.getMinItemSizeToFragment();

setupParsingOptions(configurationManager);

this.autoParser = new StandardParser();
Expand Down Expand Up @@ -875,4 +866,10 @@ public void finish() throws Exception {
totalText = null;
}

public static void copyTimesPerParser(Map<String,Long> dest) {
dest.clear();
synchronized (timesPerParser) {
dest.putAll(timesPerParser);
}
}
}

0 comments on commit 8911e11

Please sign in to comment.