Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into #1953_APKParser
Browse files Browse the repository at this point in the history
  • Loading branch information
wladimirleite committed Nov 13, 2023
2 parents 8476ce9 + 34d1721 commit 12320a4
Show file tree
Hide file tree
Showing 25 changed files with 317 additions and 96 deletions.
6 changes: 1 addition & 5 deletions iped-app/src/main/java/iped/app/ui/ExternalFileOpen.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,7 @@ public void run() {
try {
if (IOUtil.isToOpenExternally(item.getName(), item.getType())) {
LOGGER.info("Externally Opening file " + item.getPath()); //$NON-NLS-1$
File file = Util.getFileRenamedToExt(item.getTempFile(), item.getType());
file.setReadOnly();
if (IOUtil.isTemporaryFile(file)) {
file.deleteOnExit();
}
File file = Util.getFileWithRightExt(item);
open(file);
}

Expand Down
18 changes: 16 additions & 2 deletions iped-engine/src/main/java/iped/engine/data/IPEDSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,8 @@ public class IPEDSource implements IIPEDSource {

boolean isReport = false;

boolean askImagePathIfNotFound = true;

public static boolean checkIfIsCaseFolder(File dir) {
File module = new File(dir, MODULE_DIR);
if (new File(module, INDEX_DIR).exists() && new File(module, LIB_DIR).exists() && new File(module, DATA_DIR).exists()) {
Expand All @@ -155,7 +157,11 @@ public IPEDSource(File casePath) {
}

public IPEDSource(File casePath, IndexWriter iw) {
this(casePath, iw, true);
}

public IPEDSource(File casePath, IndexWriter iw, boolean askImagePathIfNotFound) {
this.askImagePathIfNotFound = askImagePathIfNotFound;
this.casePath = casePath;
moduleDir = new File(casePath, MODULE_DIR);
index = new File(moduleDir, INDEX_DIR);
Expand Down Expand Up @@ -239,6 +245,9 @@ public IPEDSource(File casePath, IndexWriter iw) {
multiBookmarks = new MultiBookmarks(Collections.singletonList(this));

} catch (Exception e) {
if (e instanceof RuntimeException) {
throw (RuntimeException) e;
}
throw new RuntimeException(e.getMessage(), e);
}
}
Expand Down Expand Up @@ -601,8 +610,13 @@ private void updateImagePathsToAbsolute(File casePath, File sleuthFile) throws E
if (newPaths.size() > 0) {
testCanWriteToCase(sleuthFile);
sleuthCase.setImagePaths(id, newPaths);
} else if (iw == null)
askNewImagePath(id, paths, sleuthFile);
} else if (iw == null) {
if (askImagePathIfNotFound) {
askNewImagePath(id, paths, sleuthFile);
} else {
throw new RuntimeException("Image not found: " + paths.get(0));
}
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public static FSDirectory open(File indexDir) throws IOException {
IndexTaskConfig config = ConfigurationManager.get().findObject(IndexTaskConfig.class);

FSDirectory result;
if (config.isUseNIOFSDirectory()) {
if (config != null && config.isUseNIOFSDirectory()) {
result = new NIOFSDirectory(indexDir.toPath());
} else {
result = FSDirectory.open(indexDir.toPath());
Expand Down
18 changes: 16 additions & 2 deletions iped-engine/src/main/java/iped/engine/task/ParsingTask.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,12 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.compress.archivers.ArchiveStreamFactory;
import org.apache.tika.exception.TikaException;
import org.apache.tika.extractor.EmbeddedDocumentExtractor;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
Expand All @@ -45,6 +47,7 @@
import org.apache.tika.parser.Parser;
import org.apache.tika.parser.html.HtmlMapper;
import org.apache.tika.parser.html.IdentityHtmlMapper;
import org.apache.tika.utils.XMLReaderUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
Expand Down Expand Up @@ -159,6 +162,7 @@ public class ParsingTask extends ThumbTask implements EmbeddedDocumentExtractor
private static final Set<MediaType> typesToCheckZipBomb = getTypesToCheckZipbomb();

private static AtomicInteger containersBeingExpanded = new AtomicInteger();
private static AtomicBoolean tikaSAXPoolSizeSet = new AtomicBoolean(false);

private CategoryToExpandConfig expandConfig;
private ParsingTaskConfig parsingConfig;
Expand Down Expand Up @@ -237,8 +241,6 @@ private ParseContext getTikaContext(File output, IPEDSource ipedsource) {
context.set(ArchiveStreamFactory.class, new ArchiveStreamFactory("Cp850")); //$NON-NLS-1$
// Indexa conteudo de todos os elementos de HTMLs, como script, etc
context.set(HtmlMapper.class, IdentityHtmlMapper.INSTANCE);
// we have seen very large records in valid docs
org.apache.poi.hpsf.CodePageString.setMaxRecordLength(512_000);

context.set(IStreamSource.class, evidence);
context.set(IItemReader.class, evidence);
Expand Down Expand Up @@ -791,6 +793,18 @@ public static void setupParsingOptions(ConfigurationManager configurationManager
ParsersConfig parserConfig = configurationManager.findObject(ParsersConfig.class);
System.setProperty("tika.config", parserConfig.getTmpConfigFile().getAbsolutePath());

// we have seen very large records in valid docs
org.apache.poi.hpsf.CodePageString.setMaxRecordLength(512_000);

// heavy Tika configuration
if (!tikaSAXPoolSizeSet.getAndSet(true)) {
try {
XMLReaderUtils.setPoolSize(Runtime.getRuntime().availableProcessors());
} catch (TikaException e) {
e.printStackTrace();
}
}

// most options below are set using sys props because they are also used by
// child external processes

Expand Down
161 changes: 161 additions & 0 deletions iped-engine/src/main/java/iped/engine/util/IPEDCrawler.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
package iped.engine.util;

import java.io.File;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;

import iped.data.IItem;
import iped.engine.data.IPEDSource;
import iped.engine.search.IPEDSearcher;

public class IPEDCrawler {

private static final boolean SKIP_KNOWN_FOLDERS = true;

private static ConcurrentLinkedQueue<File> cases = new ConcurrentLinkedQueue<>();
private static AtomicInteger numCases = new AtomicInteger();

public static void main(String[] args) {

if (args.length != 3) {
System.err.println("Please provide exactly 3 parameters: input_folder export_folder search_query");
System.exit(1);
}

File folderToScan = new File(args[0]);
File exportFolder = new File(args[1]);
String query = args[2];

if (!folderToScan.exists() || !folderToScan.isDirectory()) {
System.err.println("Input cases folder doesn't exist or is not a directory!");
System.exit(2);
}

exportFolder.mkdirs();

if (!exportFolder.exists() || !exportFolder.isDirectory()) {
System.err.println("Export folder couldn't be created or is not a directory!");
System.exit(3);
}

Thread folderScan = searchCasesinFolder(folderToScan);

AtomicInteger counter = new AtomicInteger();
AtomicInteger exported = new AtomicInteger();
AtomicInteger finished = new AtomicInteger();
ExecutorService executor = Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors());

boolean first = true;
while (folderScan.isAlive() || !cases.isEmpty()) {
File file = cases.poll();
if (file == null) {
continue;
}
if (first) {
// initialize sleuthkit using just one thread
System.out.println("Initializing from case " + file.getAbsolutePath());
IPEDSource ipedCase = new IPEDSource(file, null, false);
ipedCase.close();
first = false;
}
executor.execute(new Runnable() {
@Override
public void run() {
int caseNum = counter.incrementAndGet();
System.out.println("Searching for files into case " + caseNum + ": " + file.getAbsolutePath());
try (IPEDSource ipedCase = new IPEDSource(file, null, false)) {
IPEDSearcher searcher = new IPEDSearcher(ipedCase, query);
int[] itemIds = searcher.search().getIds();
System.out.println("Found " + itemIds.length + " files.");
if (itemIds.length == 0) {
return;
}
System.out.println("Exporting...");
for (Integer id : itemIds) {
IItem item = ipedCase.getItemByID(id);
File parentDir = new File(exportFolder, "case_" + caseNum);
parentDir.mkdirs();
File target = new File(parentDir, Util.getValidFilename(Util.getNameWithTrueExt(item)));
String ext = "";
int idx, suffix = 0;
if ((idx = target.getName().lastIndexOf('.')) > -1) {
ext = target.getName().substring(idx);
} else {
idx = target.getName().length();
}
synchronized (exported) {
while (target.exists()) {
target = new File(parentDir, target.getName().substring(0, idx) + (++suffix) + ext);
}
target.createNewFile();
}
try (InputStream in = item.getBufferedInputStream()) {
Files.copy(in, target.toPath(), StandardCopyOption.REPLACE_EXISTING);
exported.getAndIncrement();
} catch (Exception e0) {
e0.printStackTrace();
}
}
System.out.println("Exported " + exported + " files.");

} catch (Exception e) {
e.printStackTrace();
} finally {
finished.incrementAndGet();
}
}
});

}
while (finished.get() < numCases.get()) {
continue;
}
System.out.println("Exported " + exported + " files.");
System.exit(0);

}

private static Thread searchCasesinFolder(File folder) {
Thread t = new Thread() {
public void run() {
recurse(folder);
System.out.println("Cases found: " + numCases.get());
}
};
t.start();
return t;
}

private static void recurse(File folder) {
if (SKIP_KNOWN_FOLDERS) {
String name = folder.getName();
if (((name.equals("Exportados") || name.equals("Exported")) && (new File(folder.getParentFile(), IPEDSource.MODULE_DIR).exists() || new File(folder.getParentFile(), "indexador").exists())) ||
((name.equals("report") || name.equals("relatorio")) && new File(folder, "thumbs").exists()) ||
(name.equals("indexador") && new File(folder, IPEDSource.INDEX_DIR).exists())) {
return;
}
}
if (new File(folder, IPEDSource.MODULE_DIR + "/" + IPEDSource.INDEX_DIR).exists()) {
System.out.println("Case found in " + folder.getAbsolutePath());
cases.add(folder);
numCases.incrementAndGet();
} else {
System.out.println("Searching for cases in " + folder.getAbsolutePath());
File[] subFiles = folder.listFiles();
if (subFiles != null) {
for (File file : subFiles) {
if (file.isDirectory()) {
recurse(file);
}
}
}
}
}

}

Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.LinkedList;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

Expand Down Expand Up @@ -498,7 +498,7 @@ private void parseChromeSearches(ContentHandler handler, Metadata metadata, Pars

protected List<ResumedVisit> getResumedHistory(Connection connection, Metadata metadata, ParseContext context)
throws SQLException {
List<ResumedVisit> resumedHistory = new LinkedList<ResumedVisit>();
List<ResumedVisit> resumedHistory = new ArrayList<ResumedVisit>();

Statement st = null;
try {
Expand All @@ -524,7 +524,7 @@ protected List<ResumedVisit> getResumedHistory(Connection connection, Metadata m

protected List<Visit> getHistory(Connection connection, Metadata metadata, ParseContext context)
throws SQLException {
List<Visit> history = new LinkedList<Visit>();
List<Visit> history = new ArrayList<Visit>();

Statement st = null;
try {
Expand All @@ -549,7 +549,7 @@ protected List<Visit> getHistory(Connection connection, Metadata metadata, Parse

protected List<Download> getDownloads(Connection connection, Metadata metadata, ParseContext context)
throws SQLException {
List<Download> downloads = new LinkedList<Download>();
List<Download> downloads = new ArrayList<Download>();

Statement st = null;
try {
Expand Down Expand Up @@ -582,7 +582,7 @@ protected List<Download> getDownloads(Connection connection, Metadata metadata,

protected List<Search> getSearchTerms(Connection connection, Metadata metadata, ParseContext context)
throws SQLException {
List<Search> searches = new LinkedList<Search>();
List<Search> searches = new ArrayList<Search>();

Statement st = null;
try {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.StandardCopyOption;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

Expand Down Expand Up @@ -287,7 +287,7 @@ private void parseEdgeHistory(ContentHandler handler, Metadata metadata, ParseCo

protected List<EdgeContainer> getHistory(String filePath, PointerByReference filePointerReference, ItemInfo itemInfo)
throws EdgeWebCacheException {
List<EdgeContainer> history = new LinkedList<EdgeContainer>();
List<EdgeContainer> history = new ArrayList<EdgeContainer>();

try {
/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.LinkedList;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;

Expand Down Expand Up @@ -504,7 +504,7 @@ private void parseFirefoxResumedHistory(ContentHandler handler, Metadata metadat

protected List<ResumedVisit> getResumedHistory(Connection connection, Metadata metadata, ParseContext context)
throws SQLException {
List<ResumedVisit> resumedHistory = new LinkedList<ResumedVisit>();
List<ResumedVisit> resumedHistory = new ArrayList<ResumedVisit>();

Statement st = null;
try {
Expand All @@ -531,7 +531,7 @@ protected List<ResumedVisit> getResumedHistory(Connection connection, Metadata m

protected List<Visit> getHistory(Connection connection, Metadata metadata, ParseContext context)
throws SQLException {
List<Visit> history = new LinkedList<Visit>();
List<Visit> history = new ArrayList<Visit>();

Statement st = null;
try {
Expand All @@ -555,7 +555,7 @@ protected List<Visit> getHistory(Connection connection, Metadata metadata, Parse

protected List<FirefoxMozBookmark> getBookmarks(Connection connection, Metadata metadata, ParseContext context)
throws SQLException {
List<FirefoxMozBookmark> bookmarks = new LinkedList<FirefoxMozBookmark>();
List<FirefoxMozBookmark> bookmarks = new ArrayList<FirefoxMozBookmark>();

Statement st = null;
try {
Expand All @@ -580,7 +580,7 @@ protected List<FirefoxMozBookmark> getBookmarks(Connection connection, Metadata

private List<Download> getDownloads(Connection connection, Metadata metadata, ParseContext context)
throws SQLException, JsonParseException, JsonMappingException, IOException {
List<Download> downloads = new LinkedList<Download>();
List<Download> downloads = new ArrayList<Download>();

Statement st = null;
try {
Expand Down
Loading

0 comments on commit 12320a4

Please sign in to comment.