Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify times per parser control #1959

Merged
merged 9 commits into from
Oct 29, 2023
26 changes: 12 additions & 14 deletions iped-app/src/main/java/iped/app/processing/ui/ProgressFrame.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,7 @@
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicLong;
import java.util.TreeMap;

import javax.swing.BorderFactory;
import javax.swing.BoxLayout;
Expand Down Expand Up @@ -93,6 +92,7 @@ public class ProgressFrame extends JFrame implements PropertyChangeListener, Act
private boolean paused = false;
private String decodingDir = null;
private long physicalMemory;
private static final Map<String, Long> timesPerParser = new TreeMap<String, Long>();

private static class RestrictedSizeLabel extends JLabel {

Expand Down Expand Up @@ -360,7 +360,7 @@ private String getTaskTimes() {
if (task.isEnabled()) {
long time = taskTimes[i];
long sec = time / (1000000 * workers.length);
int pct = (int) ((100 * time) / totalTime);
int pct = (int) ((100 * time + totalTime / 2) / totalTime); // Round percentage

startRow(msg, task.getName(), pct);
addCell(msg, nf.format(sec) + "s", Align.RIGHT);
Expand All @@ -377,28 +377,26 @@ private String getTaskTimes() {
}

private String getParserTimes() {
if (ParsingTask.times.isEmpty())
ParsingTask.copyTimesPerParser(timesPerParser);
if (timesPerParser.isEmpty())
return "";
StringBuilder msg = new StringBuilder();
startTable(msg);
addTitle(msg, 3, Messages.getString("ProgressFrame.ParserTimes"));

long totalTime = 0;
for (Worker worker : workers)
for (AbstractTask task : worker.tasks)
if (task.getClass().equals(ParsingTask.class))
totalTime += task.getTaskTime();
for (long parserTime : timesPerParser.values()) {
totalTime += parserTime;
}
if (totalTime < 1)
totalTime = 1;

for (Object o : ParsingTask.times.entrySet().toArray()) {
@SuppressWarnings("unchecked")
Entry<String, AtomicLong> e = (Entry<String, AtomicLong>) o;
long time = e.getValue().get();
for (String parserName : timesPerParser.keySet()) {
long time = timesPerParser.get(parserName);
long sec = time / (1000000 * workers.length);
int pct = (int) ((100 * time) / totalTime);
int pct = (int) ((100 * time + totalTime / 2) / totalTime); // Round percentage

startRow(msg, e.getKey(), pct);
startRow(msg, parserName, pct);
addCell(msg, nf.format(sec) + "s", Align.RIGHT);
finishRow(msg, pct + "%", Align.RIGHT);
}
Expand Down
60 changes: 56 additions & 4 deletions iped-engine/src/main/java/iped/engine/core/Statistics.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import java.lang.management.RuntimeMXBean;
import java.util.Date;
import java.util.HashMap;
import java.util.TreeMap;
import java.util.concurrent.atomic.AtomicInteger;

import javax.swing.JOptionPane;
Expand All @@ -34,6 +35,7 @@
import iped.engine.localization.Messages;
import iped.engine.lucene.ConfiguredFSDirectory;
import iped.engine.task.ExportFileTask;
import iped.engine.task.ParsingTask;
import iped.engine.task.carver.BaseCarveTask;
import iped.engine.task.index.IndexItem;
import iped.engine.util.Util;
Expand Down Expand Up @@ -233,23 +235,73 @@ public void logStatistics(Manager manager) throws Exception {
int carvedIgnored = getCorruptCarveIgnored();
int ignored = getIgnored();

// Processing times per task
long totalTime = 0;
Worker[] workers = manager.getWorkers();
long[] taskTimes = new long[workers[0].tasks.size()];
for (Worker worker : workers) {
for (int i = 0; i < taskTimes.length; i++) {
taskTimes[i] += worker.tasks.get(i).getTaskTime();
totalTime += worker.tasks.get(i).getTaskTime();
long t = worker.tasks.get(i).getTaskTime();
taskTimes[i] += t;
totalTime += t;
}
}
LocalConfig localConfig = ConfigurationManager.get().findObject(LocalConfig.class);
totalTime = totalTime / (1000000 * localConfig.getNumThreads());
LOGGER.info("Processing Times per Task:");
StringBuilder sb = new StringBuilder();
sb.append(String.format("%-30s", "TASK"));
sb.append(String.format(" %7s", "TIME(s)"));
sb.append(String.format(" %6s", "PCT(%)"));
LOGGER.info(sb.toString());
sb.setLength(0);
sb.append(String.format("%-30s", "").replace(' ', '='));
sb.append(" ").append(String.format("%7s", "").replace(' ', '='));
sb.append(" ").append(String.format("%6s", "").replace(' ', '='));
LOGGER.info(sb.toString());
sb.setLength(0);
for (int i = 0; i < taskTimes.length; i++) {
long sec = taskTimes[i] / (1000000 * localConfig.getNumThreads());
LOGGER.info(workers[0].tasks.get(i).getName() + ":\tProcessing Time:\t" + sec + "s (" //$NON-NLS-1$ //$NON-NLS-2$
+ Math.round((100f * sec) / totalTime) + "%)"); //$NON-NLS-1$
sb.append(String.format("%-30s", workers[0].tasks.get(i).getName()));
sb.append(String.format(" %7d", sec));
sb.append(String.format(" %6d", Math.round((100f * sec) / totalTime)));
LOGGER.info(sb.toString());
sb.setLength(0);
}

// Processing times per parser
TreeMap<String, Long> timesPerParser = new TreeMap<String, Long>();
ParsingTask.copyTimesPerParser(timesPerParser);
if (!timesPerParser.isEmpty()) {
totalTime = 0;
for (long parserTime : timesPerParser.values()) {
totalTime += parserTime;
}
if (totalTime < 1)
totalTime = 1;
sb = new StringBuilder();
LOGGER.info("Processing Times per Parser:");
sb.append(String.format("%-30s", "PARSER"));
sb.append(String.format(" %7s", "TIME(s)"));
sb.append(String.format(" %6s", "PCT(%)"));
LOGGER.info(sb.toString());
sb.setLength(0);
sb.append(String.format("%-30s", "").replace(' ', '='));
sb.append(" ").append(String.format("%7s", "").replace(' ', '='));
sb.append(" ").append(String.format("%6s", "").replace(' ', '='));
LOGGER.info(sb.toString());
sb.setLength(0);
for (String parserName : timesPerParser.keySet()) {
long time = timesPerParser.get(parserName);
long sec = time / (1000000 * workers.length);
sb.append(String.format("%-30s", parserName));
sb.append(String.format(" %7d", sec));
sb.append(String.format(" %6d", Math.round(100.0 * time / totalTime)));
LOGGER.info(sb.toString());
sb.setLength(0);
}
}

int numDocs;
try (IndexReader reader = DirectoryReader.open(ConfiguredFSDirectory.open(indexDir))) {
numDocs = reader.numDocs();
Expand Down
63 changes: 30 additions & 33 deletions iped-engine/src/main/java/iped/engine/task/ParsingTask.java
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* Copyright 2012-2014, Luis Filipe da Cruz Nassif
*
* This file is part of Indexador e Processador de Evidências Digitais (IPED).
* This file is part of Indexador e Processador de Evidências Digitais (IPED).
*
* IPED is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
Expand All @@ -23,13 +23,11 @@
import java.io.InputStream;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
Expand Down Expand Up @@ -155,7 +153,7 @@ public class ParsingTask extends ThumbTask implements EmbeddedDocumentExtractor
private static int max_expanding_containers;

public static AtomicLong totalText = new AtomicLong();
public static Map<String, AtomicLong> times = Collections.synchronizedMap(new TreeMap<String, AtomicLong>());
private static final Map<String, Long> timesPerParser = new HashMap<String, Long>();

private static Map<Integer, ZipBombStats> zipBombStatsMap = new ConcurrentHashMap<>();
private static final Set<MediaType> typesToCheckZipBomb = getTypesToCheckZipbomb();
Expand All @@ -170,11 +168,11 @@ public class ParsingTask extends ThumbTask implements EmbeddedDocumentExtractor
private boolean extractEmbedded;
private volatile ParsingReader reader;
private String firstParentPath = null;
private Map<Integer, Long> timeInDepth = new ConcurrentHashMap<>();
private volatile int depth = 0;
private volatile long subitemsTime;
private Map<Object, ParentInfo> idToItemMap = new HashMap<>();
private int numSubitems = 0;
private StandardParser autoParser;
private long minItemSizeToFragment;

private static Set<MediaType> getTypesToCheckZipbomb() {
HashSet<MediaType> set = new HashSet<>();
Expand Down Expand Up @@ -307,34 +305,27 @@ public void process(IItem evidence) throws Exception {
return;
}

String parserName = getParserName(parser, evidence.getMetadata().get(Metadata.CONTENT_TYPE));
AtomicLong time = times.get(parserName);
if (time == null) {
time = new AtomicLong();
times.put(parserName, time);
}

SplitLargeBinaryConfig splitConfig = ConfigurationManager.get()
.findObject(SplitLargeBinaryConfig.class);
if (((Item) evidence).getTextCache() == null
&& ((evidence.getLength() == null || evidence.getLength() < splitConfig.getMinItemSizeToFragment())
|| StandardParser.isSpecificParser(parser))) {
&& ((evidence.getLength() == null || evidence.getLength() < minItemSizeToFragment)
|| StandardParser.isSpecificParser(parser))) {
ParsingTask task = null;
try {
depth++;
ParsingTask task = new ParsingTask(worker, autoParser);
task = new ParsingTask(worker, autoParser);
task.parsingConfig = this.parsingConfig;
task.expandConfig = this.expandConfig;
task.depth = depth;
task.timeInDepth = timeInDepth;
task.safeProcess(evidence);

} finally {
depth--;
String parserName = getParserName(parser, evidence.getMetadata().get(Metadata.CONTENT_TYPE));
long st = task == null ? 0 : task.subitemsTime;
long diff = System.nanoTime() / 1000 - start;
Long subitemsTime = timeInDepth.remove(depth + 1);
if (subitemsTime == null)
subitemsTime = 0L;
time.addAndGet(diff - subitemsTime);
if (diff < st) {
LOGGER.warn("{} Negative Parsing Time: {} {} Diff={} SubItemsTime={}",
Thread.currentThread().getName(), evidence.getPath(), parserName, diff, st);
}
synchronized (timesPerParser) {
timesPerParser.merge(parserName, diff - st, Long::sum);
}
}

}
Expand Down Expand Up @@ -694,22 +685,19 @@ public void parseEmbedded(InputStream inputStream, ContentHandler handler, Metad

// pausa contagem de timeout do pai antes de extrair e processar subitem
if (reader.setTimeoutPaused(true)) {
long start = System.nanoTime() / 1000;
try {
long start = System.nanoTime() / 1000;

ProcessTime time = ProcessTime.AUTO;

worker.processNewItem(subItem, time);
Statistics.get().incSubitemsDiscovered();
numSubitems++;

long diff = (System.nanoTime() / 1000) - start;
Long prevTime = timeInDepth.get(depth);
if (prevTime == null)
prevTime = 0L;
timeInDepth.put(depth, prevTime + diff);

} finally {
// Store time spent on subitems processing
subitemsTime += System.nanoTime() / 1000 - start;

// despausa contador de timeout do pai somente após processar subitem
reader.setTimeoutPaused(false);

Expand Down Expand Up @@ -788,6 +776,9 @@ public void init(ConfigurationManager configurationManager) {
parsingConfig = configurationManager.findObject(ParsingTaskConfig.class);
expandConfig = configurationManager.findObject(CategoryToExpandConfig.class);

SplitLargeBinaryConfig splitConfig = configurationManager.findObject(SplitLargeBinaryConfig.class);
minItemSizeToFragment = splitConfig.getMinItemSizeToFragment();

setupParsingOptions(configurationManager);

this.autoParser = new StandardParser();
Expand Down Expand Up @@ -875,4 +866,10 @@ public void finish() throws Exception {
totalText = null;
}

public static void copyTimesPerParser(Map<String,Long> dest) {
dest.clear();
synchronized (timesPerParser) {
dest.putAll(timesPerParser);
}
}
}
Loading